library(reshape2)
library(estimatr)
library(plyr)
library(foreign)
library(ltm)
library(MCMCpack)
library(wfe)
library(devtools)
library(panelView)
library(lme4)
library(arm)
library(Matching)
library(rgenoud)
library(cem)
library(quickmatch)
library(zoo)
library(dplyr)
library(did)
library(gsynth)
library(DataCombine)
library(broom)
library(stargazer)
library(parallel)
library(margins)
library(ggplot2)
library(readstata13)

#
gc()



# # Calculate the number of cores
# num_cores <- detectCores() - 1
# 
# # Initiate cluster
# cl <- makeCluster(num_cores)
gc()

###State FIPS codes
fips_codes <- read.csv("fips_codes_website.csv")


data <- readRDS("policy_data_updated.RDS")

##Load New Early Voting Data

ev <- read.csv("Early Voting Coding - Sheet1.csv")

data$early_voting_narrow
data$early_voting_broad

data <- join(data, ev[,c("state","year_eip","year_amv","year_abs")])

for(i in levels(factor(data$state))){
  
  data$early_voting_person[data$state==i & 
                             (data$year>=data$year_eip) & 
                             data$year<=2014] <- 1
  
  data$early_voting_narrow[data$state==i & 
                             (data$year>=data$year_eip|data$year>=data$year_amv) & 
                             data$year<=2014] <- 1
  data$early_voting_broad[data$state==i & 
                            (data$year>=data$year_eip|data$year>=data$year_amv|data$year>=data$year_abs) & 
                            data$year<=2014] <- 1
  
}

data$early_voting_narrow[is.na(data$early_voting_narrow) & data$year<=2014] <- 0
data$early_voting_broad[is.na(data$early_voting_broad) & data$year<=2014] <- 0
data$early_voting_person[is.na(data$early_voting_person) & data$year<=2014] <- 0


presidential_years <- c(1976,1980,1984,1988,1992,1996,2000,2004,2008,2012,2016)


#Fowler Replication

fowler <- read.dta("fowler_replication_data.dta", convert.factors = F)

names(fowler)

fowler$age_group[fowler$age<25] <- "18-24"
fowler$age_group[fowler$age>=25 & fowler$age<35] <- "25-34"
fowler$age_group[fowler$age>=35 & fowler$age<45] <- "35-44"
fowler$age_group[fowler$age>=45 & fowler$age<55] <- "45-54"
fowler$age_group[fowler$age>=55 & fowler$age<65] <- "55-64"
fowler$age_group[fowler$age>=65] <- "65+"

fowler$year <- fowler$election
fowler$st <- fowler$state

fowler_agg <- aggregate(cbind(voted, pop) ~ age_group + st + year, fowler, mean, na.rm=T)

fowler_sdr <- join(fowler_agg, data, by=c("st","year"))

fowler_sdr$turnout <- (fowler_sdr$voted/fowler_sdr$pop)*100
fowler_sdr <- cbind(fowler_sdr, data.frame(model.matrix(~ age_group - 1, data=fowler_sdr)))

fowler_sdr$age_group_ref <- relevel(factor(fowler_sdr$age_group), ref = 6)

lm1 <- lm_robust(turnout ~ sdr*age_group_ref +
            factor(st) + factor(year), fowler_sdr,
          clusters = st, se_type = "stata")

m <- margins(lm1, at = list(age_group_ref=levels(fowler_sdr$age_group_ref)))

plotdata <- data.frame(summary(m))
plotdata <- plotdata[plotdata$factor=="sdr",]

plotdata$age_group <- c("65+", "18\n-\n24", "25\n-\n34", "35\n-\n44", "45\n-\n54", "55\n-\n64")

setwd("./Output")

pdf("fowler_did_bv.pdf", h=4, w=4)
ggplot(plotdata, aes(x=age_group, y=AME)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower, ymax=upper), width=0) +
  geom_hline(yintercept=0, linetype=2) +
  xlab("Age Group") +
  ylab("Estimate") +
  scale_y_continuous(breaks=c(-5, 0, 5), limits=c(-5,7.5)) +
  theme_bw()
dev.off()

setwd("..")

######################################Load and Clean CPS Individual Data######################################

cps <- read.csv("cps_00021.csv"); gc()

names(cps) <- tolower((names(cps)))

#Removing "refused to answer"
cps$voted_alt <- cps$voted - 1
cps$voted_alt[cps$voted_alt>1] <- NA

#Main dependent variable
cps$voted[cps$voted==99] <- NA
cps$voted <- cps$voted - 1
cps$voted[cps$voted>1] <- 0

cps$faminc[cps$faminc>843] <- NA
cps$faminc <- as.numeric(factor(cps$faminc))

cps$sex[cps$sex>2] <- NA

cps$educ[cps$educ==999|cps$educ==1] <- NA

cps$educ <- as.numeric(factor(cps$educ))

#merge state FIPS codes
cps <- join(cps, fips_codes[,c("st","statefip")], match="first")

#Merge state policy data
cps <- join(cps, data, by=c("st","year")); gc()

cps$age_group[cps$age<=24] <- "18-24"
cps$age_group[cps$age>24 & cps$age<35] <- "25-34"
cps$age_group[cps$age>34 & cps$age<45] <- "35-44"
cps$age_group[cps$age>44 & cps$age<55] <- "45-54"
cps$age_group[cps$age>54 & cps$age<65] <- "55-64"
cps$age_group[cps$age>64] <- "65+"

cps$presidential_year[cps$year %in% presidential_years] <- 1
cps$presidential_year[(cps$year %in% presidential_years)==F] <- 0

cps$sex[cps$sex==0] <- NA
cps$sex <- cps$sex - 1

cps$race_5[cps$race==100] <- "White"
cps$race_5[cps$race==200] <- "Black"
cps$race_5[cps$race==300 | cps$race>=700] <- "Other"
cps$race_5[(cps$race>=600 & cps$race<700) | cps$race==809] <- "Asian"
cps <- within(cps, race_5 <- relevel(factor(race_5), ref = "White"))
cps <- cps[cps$year>=1978,]

cps$asian[cps$race_5=="Asian"] <- 1
cps$asian[cps$race_5!="Asian"] <- 0


gc()

#age_group dummy variables
cps <- cbind(cps, data.frame(model.matrix(~ age_group - 1, data=cps)))

#race dummy variables
cps <- cbind(cps, data.frame(model.matrix(~ as.character(race) - 1, data=cps)))
names(cps) <- gsub("as.character.", "", names(cps))

#registration
cps$reg_pollingplace[cps$voreghow<99 & cps$voreghow!=7] <- 0
cps$reg_pollingplace[cps$voreghow<99 & cps$voreghow==7] <- 1

cps$vote_reg[cps$voreg<=98 & cps$voreg!=2] <- 0
cps$vote_reg[cps$voreg<=98 & cps$voreg==2] <- 1


gc()

#
cps$movedless1yr[cps$voteres<14] <- 1
cps$movedless1yr[cps$voteres>=14 & cps$voteres<900] <- 0

#Remove DC
cps <- cps[cps$st!="DC",]; gc()


####Change Working Directory to output tables & plots
setwd("./Output")



##############################Descriptive Plots##############################
paneldata <- data[data$year>=1978,c("st","year","sdr","early_voting_narrow", "early_voting_broad", "early_voting_person", "pop_annual")]
paneldata$st <- as.character(paneldata$st)


pdf("PanelView - SDR (new).pdf", h=7, w=12)
panelView(D="sdr", 
          data=paneldata,
          #D="sdr",
          #by.timing = T,
          pre.post = TRUE,
          #data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="SDR",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()


pdf("PanelView - Early Voting (person).pdf", h=7, w=12)
panelView(D="early_voting_person",
          data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="Early Voting (in Person)",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()


pdf("PanelView - Early Voting (narrow).pdf", h=7, w=12)
panelView(D="early_voting_narrow",
          data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="Early Voting (Preferred Def.)",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()

pdf("PanelView - Early Voting (broad).pdf", h=7, w=12)
panelView(D="early_voting_broad",
          data=paneldata,
          index=c("st","year"),
          xlab="Year",
          ylab="State",
          main="Early Voting (Broad Def.)",
          legend.labs = c("Control Observations", "Treated Observations"),
          color=c("white","black"),
          theme.bw = T)
dev.off()


plotdata_wide <- aggregate(cbind(faminc, age, educ, race.100,race.200,asian) ~ sdr + year, cps[cps$year>=1982,], mean, na.rm=T)

plotdata_wide$race.100 <- plotdata_wide$race.100*100
plotdata_wide$race.200 <- plotdata_wide$race.200*100
plotdata_wide$asian <- plotdata_wide$asian*100


plotdata <- reshape(plotdata_wide, 
                    varying = names(plotdata_wide)[3:length(names(plotdata_wide))], 
                    v.names = "mean",
                    timevar = "variable", 
                    times = names(plotdata_wide)[3:length(names(plotdata_wide))],
                    direction = "long")

plotdata$variable[plotdata$variable=="race.100"] <- "White"
plotdata$variable[plotdata$variable=="race.200"] <- "Black"
plotdata$variable[plotdata$variable=="asian"] <- "Asian"
plotdata$variable[plotdata$variable=="faminc"] <- "Income"
plotdata$variable[plotdata$variable=="age"] <- "Age"
plotdata$variable[plotdata$variable=="educ"] <- "Education"

pdf("descriptive_demographics_byyear.pdf", h=5, w=7)
ggplot(plotdata, aes(x=year, y=mean, group=interaction(variable, sdr), shape=factor(sdr), color=factor(sdr))) +
  geom_line(size=1) +
  #geom_point() +
  facet_wrap(~variable) +
  scale_color_manual(values=c("grey","black"), name="SDR", labels=c("No", "Yes")) +
  xlab("Year") +
  ylab("") +
  theme_classic()
dev.off()

rm(plotdata, plotdata_wide); gc()


#########################################################################################################
################################################ANALYSIS#################################################
#########################################################################################################


################Figure 2 (bivariate)
agg_cps_age_sdr <- aggregate(voted ~ sdr + age_group, cps, mean, na.rm=T)
write.csv(agg_cps_age_sdr, "agg_cps_age_sdr.csv", na="", row.names = F)

pd <- position_dodge(.75)

pdf("bivariate_plot_age.pdf", h=4, w=5)
ggplot(agg_cps_age_sdr, aes(y=voted, x=age_group, fill=factor(sdr)))+
  geom_bar(width = .75, stat = "identity", position=pd, color="black") + 
  scale_fill_manual(values=c("grey","black"), labels=c("non-SDR", "SDR"), name="") +
  ylab("Pr(Voted)") +
  xlab("Age") +
  theme_classic()
dev.off()

################Appendix Figure (presidential, bivariate)

agg_cps_age_presidential_sdr <- aggregate(voted ~ sdr + age_group + presidential_year, cps, mean, na.rm=T)

agg_cps_age_presidential_sdr$prez[agg_cps_age_presidential_sdr$presidential_year==1] <- "Presidential"
agg_cps_age_presidential_sdr$prez[agg_cps_age_presidential_sdr$presidential_year==0] <- "Non-Presidential"

pdf("bivariate_plot_age_POTUS.pdf", h=4, w=8)
ggplot(agg_cps_age_presidential_sdr, aes(y=voted, x=age_group, fill=factor(sdr)))+
  geom_bar(width = .75, stat = "identity", position=pd, color="black") + 
  scale_fill_manual(values=c("grey","black"), labels=c("non-SDR", "SDR"), name="") +
  ylab("Pr(Voted)") +
  xlab("Age") +
  facet_wrap(~prez) +
  theme_classic()
dev.off()





################################################
################Diff in Diff####################         THE MAIN ANALYSIS
################################################

#########Aggregating#############
gc()

agg_cps <- aggregate(voted ~ state + year + sdr + early_voting_narrow + early_voting_broad + presidential_year, cps, mean, na.rm=T)
agg_cps_age <- aggregate(voted ~ state + year + sdr + presidential_year + age_group + 
                           povrate + pct_black_i + pct_white_i, cps, mean, na.rm=T)

cps$white[cps$race.100==1] <- 1
cps$white[cps$race.100!=1] <- 0
cps$black[cps$race.200==1] <- 1
cps$black[cps$race.200!=1] <- 0

cps$asian[cps$race>200 & cps$race<800] <- 1
cps$asian[cps$race<=200 | cps$race>=800] <- 0

race_means_agg_cps_age <- aggregate(cbind(white, black, asian, faminc, educ) ~ state + year + age_group, cps, mean, na.rm=T)

agg_cps_age <- join(agg_cps_age, race_means_agg_cps_age)


agg_cps_age <- agg_cps_age[order(agg_cps_age$state, agg_cps_age$age_group, agg_cps_age$year),]



################Aggregate Diff in Diff Specifications################

output <- list()

did_table_list <- list()

for(j in c("sdr")){
  
  agg_cps_age$temp_var <- agg_cps_age[,"sdr"]
  
  output_wfe <- list()
  output_ols <- list()
  output_wfe_controls <- list()
  output_ols_controls <- list()
  
  for(i in levels(factor(agg_cps_age$age_group))){
    
    tempdata <- agg_cps_age[agg_cps_age$age_group==i,]
    
    tempdata$state <- as.character(tempdata$state)
    
    temp <- wfe(voted ~ temp_var, tempdata, treat = "temp_var",
                unit.index="state", time.index = "year", method = "unit",
                qoi = "ate", estimator = "did", C.it = NULL,
                hetero.se = TRUE, auto.se = TRUE, #df.adjustment = F,
                White = TRUE, White.alpha = 0.05,
                verbose = T, unbiased.se = T, unweighted = FALSE,
                store.wdm = FALSE, maxdev.did = NULL,
                tol = sqrt(.Machine$double.eps))
    
    wfe_temp <- matrix(nrow = 4, ncol=5)
    wfe_temp[1,1] <- j
    wfe_temp[1,2] <- temp$coefficients
    wfe_temp[1,3] <- temp$se
    wfe_temp[1,4] <- i
    wfe_temp[1,5] <- "WFE"
    wfe_temp <- data.frame(wfe_temp)
    names(wfe_temp) <- c("term", "estimate", "std.error", "group", "model")
    output_wfe[[i]] <- wfe_temp[complete.cases(wfe_temp),]
    
    temp <- tidy(lm(voted ~ temp_var + presidential_year + factor(state) + factor(year), tempdata))
    cluster_se_temp <- as.numeric(commarobust(lm(voted ~ temp_var + presidential_year + factor(state) + factor(year), tempdata),
                                              se_type="HC1")$std.error)
    temp$std.error <- cluster_se_temp[!is.na(cluster_se_temp)]
    
    temp$group <- i
    temp$term[temp$term=="temp_var"] <- j
    temp$model <- "OLS"
    output_ols[[i]] <- temp[temp$term==j,]
    if(j=="sdr"){did_table_list[[i]] <- temp}
    
    temp <- tidy(lm(voted ~ temp_var + white + black + asian + povrate + educ + presidential_year + factor(state) + factor(year), tempdata))
    cluster_se_temp <- as.numeric(commarobust(lm(voted ~ temp_var + white + black + asian + povrate + educ + presidential_year + factor(state) + factor(year), tempdata),
                                              se_type="HC1")$std.error)
    temp$std.error <- cluster_se_temp[!is.na(cluster_se_temp)]
    
    temp$group <- i
    temp$term[temp$term=="temp_var"] <- j
    temp$model <- "OLS (controls)"
    output_ols_controls[[i]] <- temp[temp$term==j,]
    
    if(j=="sdr"){did_table_list[[i]] <- rbind(did_table_list[[i]], temp)}
    
    gc()
    
  }
  
  output[[j]] <- do.call(rbind.fill, c(output_wfe, 
                                       output_ols, 
                                       output_ols_controls))
  
}
did_table <- do.call(rbind.fill, c(did_table_list))

did_table$model[did_table$model=="OLS"] <- "Bivariate"
did_table$model[did_table$model=="OLS (controls)"] <- "Controls"


did_table[,c("estimate", "std.error", "statistic","p.value")] <- round(did_table[,c("estimate", "std.error", "statistic","p.value")], digits=2)

write.csv(did_table[grepl("factor", did_table$term)==F,], "did_tables_statelevel_full.csv", na="", row.names=F)


plotdata <- do.call(rbind.fill, c(output))

plotdata[, 2:3] <- apply(plotdata[, 2:3], 2, function(x) as.numeric(as.character(x)))
plotdata[, 2:3] <- plotdata[, 2:3]*100
plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

plotdata$term[plotdata$term=="sdr"] <- "Same Day Reg."
plotdata$term[plotdata$term=="early_voting_narrow"] <- "Early Voting (Restrictive Def.)"
plotdata$term[plotdata$term=="early_voting_broad"] <- "Early Voting (Broad Def.)"
plotdata$term[plotdata$term=="voterid"] <- "Voter ID"

plotdata$Model <- plotdata$model

write.csv(plotdata, "diffindiff_estimates_cps5.csv", na="")


################Individual Level Diff in Diff################
rm(list=ls(pattern="indiv"))
rm(list=ls(pattern="mlm"))
gc()

age_groups <- c("65+", "18\n-\n24","25\n-\n34","35\n-\n44","45\n-\n54","55\n-\n64")



#SDR

indiv_sdr <- lm_robust(voted ~ sdr*age_group18.24 + 
                         sdr*age_group25.34 + 
                         sdr*age_group35.44 + 
                         sdr*age_group45.54 + 
                         sdr*age_group55.64 + 
                         factor(race) + sex + faminc + educ + factor(year) + factor(state),
                       clusters=state, se_type="stata",
                       cps)

indiv_sdr_tidy <- estimatr::tidy(indiv_sdr)
indiv_sdr_tidy$Model <- "Individual Level\n(controls)"
indiv_sdr_tidy <- indiv_sdr_tidy[grepl("sdr", indiv_sdr_tidy$term),]
indiv_sdr_tidy$age_group <- age_groups
indiv_sdr_tidy$treat <- "SDR"

indiv_sdr_bv <- lm_robust(voted ~ sdr*age_group18.24 + 
                            sdr*age_group25.34 + 
                            sdr*age_group35.44 + 
                            sdr*age_group45.54 + 
                            sdr*age_group55.64 + 
                            factor(year) + factor(state), 
                          clusters=state, se_type="stata",
                          cps)

indiv_sdr_bv_tidy <- estimatr::tidy(indiv_sdr_bv)
indiv_sdr_bv_tidy$Model <- "Individual Level\n(bivariate)"
indiv_sdr_bv_tidy <- indiv_sdr_bv_tidy[grepl("sdr", indiv_sdr_bv_tidy$term),]
indiv_sdr_bv_tidy$age_group <- age_groups
indiv_sdr_bv_tidy$treat <- "SDR"

did_table <- rbind(estimatr::tidy(indiv_sdr_bv), estimatr::tidy(indiv_sdr))
did_table[,c("estimate", "std.error", "statistic", "p.value")] <- round(did_table[,c("estimate", "std.error", "statistic", "p.value")], digits=2)

write.csv(did_table, "did_tables_indivlevel_full.csv")



#SDR with three state-year FEs

cps$decade[cps$year<=1992] <- "1992"
cps$decade[cps$year>=1994 & cps$year<=2004] <- "2004"
cps$decade[cps$year>=2006] <- "2018"
cps$state_decade <- paste(cps$st, cps$decade, sep="_")

indiv_sdr_sdFE <- lm_robust(voted ~ sdr*age_group18.24 + 
                              sdr*age_group25.34 + 
                              sdr*age_group35.44 + 
                              sdr*age_group45.54 + 
                              sdr*age_group55.64 + 
                              factor(race) + sex + faminc + educ + 
                              factor(state_decade),
                            clusters=state, se_type="stata",
                            cps)

indiv_sdr_sdFE_tidy <- estimatr::tidy(indiv_sdr_sdFE)
indiv_sdr_sdFE_tidy$Model <- "Individual Level\n(controls, state-decade FEs)"
indiv_sdr_sdFE_tidy <- indiv_sdr_sdFE_tidy[grepl("sdr", indiv_sdr_sdFE_tidy$term),]
indiv_sdr_sdFE_tidy$age_group <- age_groups
indiv_sdr_sdFE_tidy$treat <- "SDR"

indiv_sdr_sdFE_bv <- lm_robust(voted ~ sdr*age_group18.24 + 
                                 sdr*age_group25.34 + 
                                 sdr*age_group35.44 + 
                                 sdr*age_group45.54 + 
                                 sdr*age_group55.64 + 
                                 factor(state_decade), 
                               clusters=state, se_type="stata",
                               cps)

indiv_sdr_sdFE_bv_tidy <- estimatr::tidy(indiv_sdr_sdFE_bv)
indiv_sdr_sdFE_bv_tidy$Model <- "Individual Level\n(bivariate, state-decade FEs)"
indiv_sdr_sdFE_bv_tidy <- indiv_sdr_sdFE_bv_tidy[grepl("sdr", indiv_sdr_sdFE_bv_tidy$term),]
indiv_sdr_sdFE_bv_tidy$age_group <- age_groups
indiv_sdr_sdFE_bv_tidy$treat <- "SDR"

did_table <- rbind(estimatr::tidy(indiv_sdr_sdFE_bv), estimatr::tidy(indiv_sdr_sdFE))
did_table[,c("estimate", "std.error", "statistic", "p.value")] <- round(did_table[,c("estimate", "std.error", "statistic", "p.value")], digits=2)

write.csv(did_table, "did_tables_indivlevel_statedecadeFEs_full.csv")





#SDR with state-time trends

indiv_sdr_timetrends <- lm_robust(voted ~ sdr*age_group18.24 + 
                                    sdr*age_group25.34 + 
                                    sdr*age_group35.44 + 
                                    sdr*age_group45.54 + 
                                    sdr*age_group55.64 + 
                                    factor(race) + sex + faminc + educ + 
                                    factor(year) + 
                                    factor(state)*year,
                                  clusters=state, se_type="stata",
                                  cps)

indiv_sdr_timetrends_tidy <- estimatr::tidy(indiv_sdr_timetrends)
indiv_sdr_timetrends_tidy$Model <- "Individual Level\n(controls, state-time trends)"
indiv_sdr_timetrends_tidy <- indiv_sdr_timetrends_tidy[grepl("sdr", indiv_sdr_timetrends_tidy$term),]
indiv_sdr_timetrends_tidy$age_group <- age_groups
indiv_sdr_timetrends_tidy$treat <- "SDR"

indiv_sdr_timetrends_bv <- lm_robust(voted ~ sdr*age_group18.24 + 
                                       sdr*age_group25.34 + 
                                       sdr*age_group35.44 + 
                                       sdr*age_group45.54 + 
                                       sdr*age_group55.64 + 
                                       factor(year) + 
                                       factor(state)*year, 
                                     clusters=state, se_type="stata",
                                     cps)

indiv_sdr_timetrends_bv_tidy <- estimatr::tidy(indiv_sdr_timetrends_bv)
indiv_sdr_timetrends_bv_tidy$Model <- "Individual Level\n(bivariate, state-time trends)"
indiv_sdr_timetrends_bv_tidy <- indiv_sdr_timetrends_bv_tidy[grepl("sdr", indiv_sdr_timetrends_bv_tidy$term),]
indiv_sdr_timetrends_bv_tidy$age_group <- age_groups
indiv_sdr_timetrends_bv_tidy$treat <- "SDR"

did_table <- rbind(estimatr::tidy(indiv_sdr_timetrends_bv), estimatr::tidy(indiv_sdr_timetrends))
did_table[,c("estimate", "std.error", "statistic", "p.value")] <- round(did_table[,c("estimate", "std.error", "statistic", "p.value")], digits=2)

write.csv(did_table, "did_tables_indivlevel_statetimetrends_full.csv")



#Early Voting (Narrow)
indiv_ev <- lm_robust(voted ~ early_voting_narrow*age_group18.24 + 
                        early_voting_narrow*age_group25.34 + 
                        early_voting_narrow*age_group35.44 + 
                        early_voting_narrow*age_group45.54 + 
                        early_voting_narrow*age_group55.64 + 
                        factor(race) + sex + faminc + educ + factor(year) + factor(state), 
                      clusters=state, se_type="stata",
                      cps)

indiv_ev_tidy <- estimatr::tidy(indiv_ev)
indiv_ev_tidy$Model <- "Individual Level\n(controls)"
indiv_ev_tidy <- indiv_ev_tidy[grepl("early_voting_narrow", indiv_ev_tidy$term),]
indiv_ev_tidy$age_group <- age_groups
indiv_ev_tidy$treat <- "Early Voting"


indiv_ev_bv <- lm_robust(voted ~ early_voting_narrow*age_group18.24 + 
                           early_voting_narrow*age_group25.34 + 
                           early_voting_narrow*age_group35.44 + 
                           early_voting_narrow*age_group45.54 + 
                           early_voting_narrow*age_group55.64 + 
                           factor(year) + factor(state), 
                         clusters=state, se_type="stata",
                         cps)

indiv_ev_bv_tidy <- estimatr::tidy(indiv_ev_bv)
indiv_ev_bv_tidy$Model <- "Individual Level\n(bivariate)"
indiv_ev_bv_tidy <- indiv_ev_bv_tidy[grepl("early_voting_narrow", indiv_ev_bv_tidy$term),]
indiv_ev_bv_tidy$age_group <- age_groups
indiv_ev_bv_tidy$treat <- "Early Voting"

gc()

plotdata <- rbind.fill(indiv_sdr_tidy, indiv_sdr_bv_tidy, indiv_ev_tidy, indiv_ev_bv_tidy, 
                       indiv_sdr_sdFE_tidy, indiv_sdr_sdFE_bv_tidy, indiv_sdr_timetrends_tidy, indiv_sdr_timetrends_bv_tidy)

plotdata$aggregate <- "Individual\nLevel"

#summing for marginal effects
for(i in levels(factor(plotdata$Model))){
  
  for(j in levels(factor(plotdata$treat))){
    
    plotdata$estimate[plotdata$Model==i & plotdata$treat==j &
                        grepl(":", plotdata$term)] <-
      plotdata$estimate[plotdata$Model==i & plotdata$treat==j &
                          grepl(":", plotdata$term)] +
      plotdata$estimate[plotdata$Model==i & plotdata$treat==j &
                          (plotdata$term=="sdr"|plotdata$term=="early_voting_narrow")]
    
    plotdata$std.error[plotdata$Model==i & plotdata$treat==j & 
                         (plotdata$term=="sdr"|plotdata$term=="early_voting_narrow")] <-
      plotdata$std.error[plotdata$Model==i & plotdata$treat==j & 
                           grepl(":", plotdata$term)][2]
    
  }
  
}


plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")] 
plotdata$cilow <- plotdata$estimate-(1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate+(1.96*plotdata$std.error)


plotdata <- rbind.fill(plotdata, read.csv("diffindiff_estimates_cps5.csv", stringsAsFactors = F))

plotdata$treat[grepl("Same", plotdata$term)] <- "SDR"
plotdata$treat[grepl("Early", plotdata$term)] <- "Early Voting"
plotdata$age_group[!is.na(plotdata$group) & grepl("fixed", plotdata$group)==F] <- 
  plotdata$group[!is.na(plotdata$group) & grepl("fixed", plotdata$group)==F]

plotdata$age_group[grepl("65", plotdata$age_group)] <- "65+"
plotdata$age_group[grepl("18", plotdata$age_group)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$age_group)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$age_group)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$age_group)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$age_group)] <- "55\n-\n64"

plotdata$aggregate[is.na(plotdata$aggregate)] <- "Aggregated\nby State"

plotdata$Model[plotdata$Model=="OLS" & plotdata$aggregate=="Aggregated\nby State"] <- "State Level\n(bivariate)"
plotdata$Model[plotdata$Model=="OLS (controls)" & plotdata$aggregate=="Aggregated\nby State"] <- "State Level\n(controls)"

plotdata$Model[plotdata$Model=="WFE"] <- "State Level WFE"

write.csv(plotdata, "diffindiff_estimate_all_20.6.10.csv", na="")

pd <- position_dodge(.5)

est_ranges <- plotdata[plotdata$treat=="SDR" &
                         grepl("MLM", plotdata$Model)==F & plotdata$age_group!="18\n-\n24",]

pdf("diffindiff_cps_RR4.pdf", h=7, w=9)
ggplot(plotdata[plotdata$treat=="SDR" &
                  grepl("MLM", plotdata$Model)==F,], aes(x=age_group, y=estimate)) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  facet_wrap(~Model) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_x_discrete(breaks=c(0,1)) +
  #scale_color_manual(values=c("black", "black", "grey"), guide=F) +
  geom_hline(yintercept=0, linetype=2) +
  #scale_x_discrete(name="", limits = rev(levels(factor(plotdata$age_group)))) +
  #coord_flip() +
  ylab("Effect on Turnout") +
  xlab("Age Group") +
  theme_bw()
dev.off()


####Replicating in Stata
setwd("..")
write.dta(cps[!is.na(cps$voted),], "cps_clean_stata.dta", convert.factors = "string")
setwd("./Output")


#Alternative covariate coding (Appendix Tables A15 and A16)

alt_cov <- lm_robust(voted ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(race) + sex + factor(faminc) + factor(educ) + factor(year) + factor(state),
                     clusters=state, se_type="stata",
                     cps)

alt_cov_tidy <- estimatr::tidy(alt_cov)
alt_cov_tidy$Model <- "Individual Level\n(controls)"
alt_cov_tidy$treat <- "SDR"


alt_cov2 <- lm_robust(voted ~ sdr*age_group18.24 + 
                        sdr*age_group25.34 + 
                        sdr*age_group35.44 + 
                        sdr*age_group45.54 + 
                        sdr*age_group55.64 + 
                        factor(race) + sex + factor(faminc) + factor(educ) + factor(year) + factor(state)*year,
                      clusters=state, se_type="stata",
                      cps)

alt_cov2_tidy <- estimatr::tidy(alt_cov2)
alt_cov2_tidy$Model <- "Individual Level\n(controls, state-time trends)"
alt_cov2_tidy$treat <- "SDR"

plot_alt <- rbind(alt_cov_tidy, alt_cov2_tidy)
plot_alt <- plot_alt[grepl("state", plot_alt$term)==F & grepl("year", plot_alt$term)==F,]

write.csv(plot_alt, "DiD_alt_covs.csv", row.names = F)




##########Individual Level DiD POTUS nonPOTUS (Figure 4)###############



ols_4 <- lm(voted ~ sdr*age_group18.24*presidential_year + 
              sdr*age_group25.34*presidential_year + 
              sdr*age_group35.44*presidential_year + 
              sdr*age_group45.54*presidential_year + 
              sdr*age_group55.64*presidential_year + 
              factor(race) + sex + faminc + educ + factor(year) + factor(state), 
            cps)
clusters_4 <- as.character(expand.model.frame(ols_4, ~st, na.expand = T)$st)
fit <- commarobust(ols_4, clusters=clusters_4, se_type = "stata")

write.csv(estimatr::tidy(fit), "potus_nonpotus_full_int.csv", row.names = F)




#OLS
ols_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==1,])


ols_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==1,])

ols_3 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==0,])


ols_4 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + presidential_year + factor(year) + factor(state), 
              cps[cps$presidential_year==0,])

clusters_1 <- as.character(expand.model.frame(ols_1, ~st, na.expand = T)$st)
clusters_2 <- as.character(expand.model.frame(ols_2, ~st, na.expand = T)$st)
clusters_3 <- as.character(expand.model.frame(ols_3, ~st, na.expand = T)$st)
clusters_4 <- as.character(expand.model.frame(ols_4, ~st, na.expand = T)$st)

fit_1_r <- commarobust(ols_1, clusters=clusters_1, se_type = "stata")
fit_2_r <- commarobust(ols_2, clusters=clusters_2, se_type = "stata")
fit_3_r <- commarobust(ols_3, clusters=clusters_3, se_type = "stata")
fit_4_r <- commarobust(ols_4, clusters=clusters_4, se_type = "stata")


stargazer(ols_1, ols_2, ols_3, ols_4, se = starprep(fit_1_r, fit_2_r, fit_4_r, fit_4_r, clusters = cps$st), 
          out="POTUS_nonPOTUS_DiD_individual_cps_ols_age.html", type="html", 
          omit = c("year","race", "state"),
          omit.labels = c("Race Dummies", "Year FEs", "State FEs"),
          star.cutoffs = c(0.05,0.01,0.001))

rm(list=ls(pattern="logit_"))
rm(list=ls(pattern="ols_")); gc()


###Plot

model_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + factor(year) + factor(state),
              cps[cps$presidential_year==1,])

ols_1 <- tidy(model_1)

clusters_1 <- as.character(expand.model.frame(model_1, ~st, na.expand = T)$st)
ols_1$std.error <- commarobust(model_1, clusters = clusters_1, se_type="stata")$std.error

rm(model_1, clusters_1); gc()


model_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + educ + factor(year) + factor(state),
              cps[cps$presidential_year==0,])
ols_2 <- tidy(model_2)

clusters_2 <- as.character(expand.model.frame(model_2, ~st, na.expand = T)$st)
ols_2$std.error <- commarobust(model_2, clusters = clusters_2, se_type="stata")$std.error

rm(model_2, clusters_2); gc()

ols_1$presidential_year <- 1
ols_2$presidential_year <- 0

ols_1$estimate[grepl("sdr:", ols_1$term)] <- ols_1$estimate[grepl("sdr:", ols_1$term)] + ols_1$estimate[ols_1$term=="sdr"]
ols_2$estimate[grepl("sdr:", ols_2$term)] <- ols_2$estimate[grepl("sdr:", ols_2$term)] + ols_2$estimate[ols_2$term=="sdr"]

plotdata <- rbind(ols_1[grepl("sdr", ols_1$term),], ols_2[grepl("sdr", ols_2$term),])
plotdata$age_group <- rep(c("65+", "18-24","25-34","35-44","45-54","55-64"), 2)

plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")]

plotdata$cihigh <- plotdata$estimate + 1.96*plotdata$std.error
plotdata$cilow <- plotdata$estimate - 1.96*plotdata$std.error

write.csv(plotdata, "POTUS_nonPOTUS_individual_DiD_results.csv", na="")

pd <- position_dodge(.5)

pdf("mfx_presidential_elections_DiD_controls.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=factor(presidential_year))) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  #facet_wrap(~term) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_x_discrete(breaks=c(0,1)) +
  scale_color_manual(values=c("grey", "black"), name="Presidential\nElection") +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("Effect on Turnout") +
  xlab("Age Group") +
  theme_bw()
dev.off()




#Bivariate DiD POTUS non-POTUS

model_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(year) + factor(state),
              cps[cps$presidential_year==1,])

ols_1 <- tidy(model_1)

clusters_1 <- as.character(expand.model.frame(model_1, ~st, na.expand = T)$st)
ols_1$std.error <- commarobust(model_1, clusters = clusters_1, se_type="stata")$std.error

rm(model_1, clusters_1); gc()


model_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(year) + factor(state),
              cps[cps$presidential_year==0,])
ols_2 <- tidy(model_2)

clusters_2 <- as.character(expand.model.frame(model_2, ~st, na.expand = T)$st)
ols_2$std.error <- commarobust(model_2, clusters = clusters_2, se_type="stata")$std.error

rm(model_2, clusters_2); gc()

ols_1$presidential_year <- 1
ols_2$presidential_year <- 0

ols_1$estimate[grepl("sdr:", ols_1$term)] <- ols_1$estimate[grepl("sdr:", ols_1$term)] + ols_1$estimate[ols_1$term=="sdr"]
ols_2$estimate[grepl("sdr:", ols_2$term)] <- ols_2$estimate[grepl("sdr:", ols_2$term)] + ols_2$estimate[ols_2$term=="sdr"]

plotdata <- rbind(ols_1[grepl("sdr", ols_1$term),], ols_2[grepl("sdr", ols_2$term),])
plotdata$age_group <- rep(c("65+", "18-24","25-34","35-44","45-54","55-64"), 2)

plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")]

plotdata$cihigh <- plotdata$estimate + 1.96*plotdata$std.error
plotdata$cilow <- plotdata$estimate - 1.96*plotdata$std.error

write.csv(plotdata, "POTUS_nonPOTUS_individual_DiD_results_bv.csv", na="")

pd <- position_dodge(.5)


pdf("mfx_presidential_elections_DiD_bv.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=factor(presidential_year))) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  #facet_wrap(~model) +
  scale_y_continuous(breaks=c(0,5,10), limits=c(min(plotdata$cilow), 10)) +
  #scale_x_discrete(breaks=c(0,1)) +
  scale_color_manual(values=c("grey", "black"), name="Presidential\nElection") +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("Effect on Turnout") +
  xlab("Age Group") +
  theme_bw()
dev.off()




##########################Mechanism: Registration (Figure 5)################################

###Plot
gc()

model_1 <- lm_robust(reg_pollingplace ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(state) + factor(year), 
                     clusters = st, se_type="stata",
                     cps)

model_2 <- lm_robust(reg_pollingplace ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(race) + sex + faminc + educ + 
                       factor(state) + factor(year), 
                     clusters = st, se_type="stata",
                     cps)

ols_1 <- estimatr::tidy(model_1)
ols_2 <- estimatr::tidy(model_2)

ols_1$Model <- "Bivariate"
ols_2$Model <- "Controls"

plotdata <- rbind(ols_1, ols_2)


plotdata <- plotdata[grepl("sdr", plotdata$term),]

plotdata$age_group[grepl("*:", plotdata$term)==F] <- "65+"
plotdata$age_group[grepl("18", plotdata$term)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$term)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$term)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$term)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$term)] <- "55\n-\n64"

plotdata[,c("estimate","std.error")] <- sapply(plotdata[,c("estimate","std.error")], function(x) as.numeric(as.character(x)))
plotdata[,c("term","age_group")] <- sapply(plotdata[,c("term","age_group")], as.character)

#summing interaction and constituent terms
for(i in levels(factor(plotdata$Model))){
  
  plotdata$estimate[grepl(i, plotdata$Model) & plotdata$age_group!="65+"] <-
    plotdata$estimate[grepl(i, plotdata$Model) & plotdata$age_group!="65+"] +
    plotdata$estimate[grepl(i, plotdata$Model) & plotdata$age_group=="65+"]
  
}

plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

pdf("did_regpollingplace_plot.pdf", h=3, w=4)
ggplot(plotdata, aes(x=age_group, y=estimate)) +
  geom_point(size=1) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  facet_wrap(~Model) +
  geom_hline(yintercept=0, linetype=2) +
  #scale_y_continuous(breaks=c(-.05, 0, .05, .1), limits=c(-.05, .1)) +
  xlab("Age") +
  ylab("Effect on Pr(Reg. at Polling Place)") +
  theme_bw()
dev.off()




#################Residential Moving Mechanism####################

pdf("moving_by_age.pdf", h=3, w=3)
ggplot(cps, aes(x=age, y=movedless1yr)) +
  geom_smooth(color="black") +
  #geom_point(alpha=.05) +
  xlab("Age") +
  ylab("Pr(Moved in Last Year)") +
  scale_y_continuous(breaks=c(0,.25,.5)) +
  coord_cartesian(ylim=c(0,.5)) +
  theme_classic()
dev.off()

plotdata <- rbind(tidy(lm(voted ~ movedless1yr + factor(state) +
                                             factor(year), 
                                           cps))[2,],
                  tidy(lm(voted ~ movedless1yr +
                                             age_group18.24 + 
                                             age_group25.34 + 
                                             age_group35.44 + 
                                             age_group45.54 + 
                                             age_group55.64 + 
                                             factor(race) + factor(sex) + faminc + educ + 
                            presidential_year + factor(state) + factor(year), cps))[2,])


plotdata[,c("estimate","std.error")] <- 100*plotdata[,c("estimate","std.error")]

plotdata$cilow <- plotdata$estimate-1.96*plotdata$std.error
plotdata$cihigh <- plotdata$estimate+1.96*plotdata$std.error

plotdata$Model[1] <- "Bivariate"
plotdata$Model[2] <- "Controls"

pdf("moving_turnout.pdf", h=3, w=3)
ggplot(plotdata, aes(x=Model, y=estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  geom_hline(yintercept = 0, linetype=2) +
  ylab("Estimate") +
  theme_classic()
dev.off()





################Population Back-of-the-Envelope Estimates#####################


did_results <- read.csv("diffindiff_estimate_all_20.6.10.csv", na.strings="")
did_results <- did_results[(did_results$Model=="Individual Level\n(controls)"|
                              did_results$Model=="Individual Level\n(bivariate)"|
                              did_results$Model=="State Level\n(bivariate)"|
                              did_results$Model=="State Level\n(controls)") &
                             did_results$treat=="SDR",]
did_results <- did_results[!is.na(did_results$estimate),]


logit_2 <- glm(voted ~ sdr*age_group18.24 + 
                 sdr*age_group25.34 + 
                 sdr*age_group35.44 + 
                 sdr*age_group45.54 + 
                 sdr*age_group55.64 + 
                 factor(race) + sex + faminc + educ + presidential_year + factor(year), 
               cps, family=binomial(link="logit"))

setwd("..")
pop_proportions <- read.csv("age_pop_proportions_census2010.csv")
setwd("./Output")

gc()

sim_data <- cps[1:6,]

sim_data[,grepl("age_group", names(sim_data))] <- 0
sim_data$faminc <- mean(cps$faminc, na.rm=T)
sim_data$sex <- 1
sim_data$sdr <- 0
sim_data$year <- 2014
sim_data$educ <- mean(cps$educ, na.rm=T)

sim_data$age_group <- pop_proportions$age_group

for(i in names(sim_data)[grepl("age_group", names(sim_data))][2:7]){
  
  sim_data[which(names(sim_data)[grepl("age_group", names(sim_data))][2:7]==i),i] <- 1
  
}

sim_data <- rbind(sim_data, sim_data)
sim_data$sdr[7:12] <- 1

preds <- data.frame(predict(logit_2, newdata=sim_data, se.fit=T, type="response"))
preds$age_group <- rep(pop_proportions$age_group, 2)


preds$fit_wtd <- preds$fit * rep(pop_proportions$proportion_100,2)

preds$fit_prop_electorate[1:6] <- preds$fit_wtd[1:6]/sum(preds$fit_wtd[1:6])
preds$fit_prop_electorate[7:12] <- preds$fit_wtd[7:12]/sum(preds$fit_wtd[7:12])
preds$sdr[1:6] <- "No SDR"
preds$sdr[7:12] <- "SDR"

plotdata <- preds[1:6,]
plotdata$fit_prop_electorate <- (preds$fit_prop_electorate[7:12] - plotdata$fit_prop_electorate)*100
plotdata$se.fit <- plotdata$se.fit*100

plotdata$cilow <- plotdata$fit_prop_electorate - 1.96*plotdata$se.fit
plotdata$cihigh <- plotdata$fit_prop_electorate + 1.96*plotdata$se.fit


newdata <- model.frame(voted ~ sdr*age_group18.24 + 
                         sdr*age_group25.34 + 
                         sdr*age_group35.44 + 
                         sdr*age_group45.54 + 
                         sdr*age_group55.64 + 
                         factor(race) + sex + faminc + educ + presidential_year + factor(year), data=cps)

newdata <- join(newdata, cps[1:100,grepl("age_group", names(cps))], match="first")

newdata$sdr <- 0
names(newdata)[c(8,13)] <- c("race","year")

newdata$preds_nosdr <- predict(logit_2, newdata=newdata, se.fit=F, type="response")

newdata$sdr <- 1
newdata$preds_sdr <- predict(logit_2, newdata=newdata, se.fit=F, type="response")

agg <- aggregate(cbind(preds_nosdr, preds_sdr)~age_group, newdata, mean, na.rm=T)
agg$model <- "Logit"

agg$std.error <- 100*preds$se.fit[!is.na(preds$se.fit)][7:12]

write.csv(agg, "mfx_predicted_turnout_cps_logit.csv")



did_results <- did_results[order(did_results$Model, did_results$age_group),]

plotdata <- agg
plotdata <- rbind(plotdata, plotdata, plotdata)
plotdata$model[7:12] <- "Diff-in-Diff"
plotdata$model[13:18] <- "Diff-in-Diff w/ controls"

plotdata$preds_sdr[7:12] <- plotdata$preds_nosdr[7:12] + (did_results$estimate[did_results$Model=="State Level\n(controls)"]/100)
plotdata$preds_sdr[13:18] <- plotdata$preds_nosdr[13:18] + (did_results$estimate[did_results$Model=="Individual Level\n(controls)"]/100)

plotdata$std.error[7:12] <- did_results$std.error[did_results$Model=="Individual Level\n(bivariate)"]
plotdata$std.error[13:18] <- did_results$std.error[did_results$Model=="Individual Level\n(controls)"]

for(i in levels(factor(plotdata$model))){
  
  plotdata[plotdata$model==i,c("preds_nosdr","preds_sdr")] <- 
    (plotdata[plotdata$model==i,c("preds_nosdr","preds_sdr")]*pop_proportions$proportion_100)
  
  plotdata$preds_nosdr[plotdata$model==i] <- 100*plotdata$preds_nosdr[plotdata$model==i]/sum(plotdata$preds_nosdr[plotdata$model==i])
  plotdata$preds_sdr[plotdata$model==i] <- 100*plotdata$preds_sdr[plotdata$model==i]/sum(plotdata$preds_sdr[plotdata$model==i])
  
  plotdata$diff[plotdata$model==i] <- plotdata$preds_sdr[plotdata$model==i]-plotdata$preds_nosdr[plotdata$model==i]
  plotdata$cilow[plotdata$model==i] <- plotdata$diff[plotdata$model==i] - (1.96*plotdata$std.error[plotdata$model==i]/4)
  plotdata$cihigh[plotdata$model==i] <- plotdata$diff[plotdata$model==i] + (1.96*plotdata$std.error[plotdata$model==i]/4)
  
}

plotdata$Model <- plotdata$model

pd <- position_dodge(.5)

pdf("mfx_pop_proportions.pdf", h=4, w=6)
ggplot(plotdata[plotdata$Model!="Logit",], aes(x=age_group, y=diff, shape=Model)) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=2.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  #scale_color_manual(values=c("grey", "grey", "black")) +
  #facet_wrap(~term) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_y_continuous(breaks=c(-1, -.5, 0, .5, 1)) +
  #scale_shape_manual(values=c(16,17,15), name="Model") +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("Change in Percentage of Electorate") +
  xlab("Age Group") +
  theme_bw()
dev.off()

write.csv(plotdata, "mfx_pop_proportions.csv")






########################################################################
########################################################################
#####################SUPPLEMENTAL MATERIALS (APPENDIX)##################
########################################################################
########################################################################



###Appendix: Alternative DV (excludes "refused to answer")
gc()

lm_1 <- lm_robust(voted_alt ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(state) + factor(year), 
                     cps, cluster=st, se_type="stata")

lm_2 <- lm_robust(voted_alt ~ sdr*age_group18.24 + 
                 sdr*age_group25.34 + 
                 sdr*age_group35.44 + 
                 sdr*age_group45.54 + 
                 sdr*age_group55.64 + 
                 factor(race) + sex + faminc + educ + presidential_year + factor(state) + factor(year), 
               cps, cluster=st, se_type="stata")

age_groups <- c("65+", "18\n-\n24","25\n-\n34","35\n-\n44","45\n-\n54","55\n-\n64")

lm_1_tidy <- estimatr::tidy(lm_1)
lm_1_tidy$Model <- "Bivariate"
lm_1_tidy <- lm_1_tidy[grepl("sdr", lm_1_tidy$term),]
lm_1_tidy$age_group <- age_groups
lm_1_tidy$treat <- "SDR"

lm_2_tidy <- estimatr::tidy(lm_2)
lm_2_tidy$Model <- "Controls"
lm_2_tidy <- lm_2_tidy[grepl("sdr", lm_2_tidy$term),]
lm_2_tidy$age_group <- age_groups
lm_2_tidy$treat <- "SDR"

plotdata <- rbind(lm_1_tidy, lm_2_tidy)

for(i in c("sdr")){
  
  plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+" & plotdata$Model=="Bivariate"] <-
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+" & plotdata$Model=="Bivariate"] +
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group=="65+" & plotdata$Model=="Bivariate"]
  
  plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+" & plotdata$Model=="Controls"] <-
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+" & plotdata$Model=="Controls"] +
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group=="65+" & plotdata$Model=="Controls"]
}

plotdata$cihigh <- plotdata$estimate + 1.96*plotdata$std.error
plotdata$cilow <- plotdata$estimate - 1.96*plotdata$std.error

pd <- position_dodge(.3)

pdf("DiD_cps_altdv_plot.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=Model)) +
  geom_point(size=1, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  #facet_wrap(~policy) +
  geom_hline(yintercept=0, linetype=2) +
  scale_y_continuous(breaks=c(-.05, 0, .05)) +
  coord_cartesian(ylim=c(-0.075,0.075)) +
  scale_color_manual(values=c("grey", "black")) +
  xlab("Age") +
  ylab("SDR Turnout Effect") +
  theme_bw()
dev.off()


  
  
  
stargazer(lm_1, lm_2, se = starprep(lm_1, lm_2, clusters = cps$st),  
          out="DiD_cps_age_altdv.html", type="html", 
          omit = c("year","race"),
          omit.labels = c("Race Dummies", "State FEs", "Year FEs"),
          star.cutoffs = c(0.05,0.01,0.001))

rm(list=ls(pattern="logit_"))
rm(list=ls(pattern="ols_")); gc()





################All Voting Laws In One###############

##############Plot (Figure A7)

#Preferred EV coding

model_1 <- estimatr::tidy(lm_robust(voted ~ sdr*age_group18.24 + 
                                      sdr*age_group25.34 + 
                                      sdr*age_group35.44 + 
                                      sdr*age_group45.54 + 
                                      sdr*age_group55.64 + 
                                      early_voting_narrow*age_group18.24 + 
                                      early_voting_narrow*age_group25.34 + 
                                      early_voting_narrow*age_group35.44 + 
                                      early_voting_narrow*age_group45.54 + 
                                      early_voting_narrow*age_group55.64 +
                                      anyid2*age_group18.24 + 
                                      anyid2*age_group25.34 + 
                                      anyid2*age_group35.44 + 
                                      anyid2*age_group45.54 + 
                                      anyid2*age_group55.64 +
                                      anyphotoid2*age_group18.24 + 
                                      anyphotoid2*age_group25.34 + 
                                      anyphotoid2*age_group35.44 + 
                                      anyphotoid2*age_group45.54 + 
                                      anyphotoid2*age_group55.64 +
                                      #factor(race) + sex + faminc + educ + presidential_year + 
                                      factor(state) + factor(year), 
                                    cps, clusters=st, se_type="stata"))

model_2 <- estimatr::tidy(lm_robust(voted ~ sdr*age_group18.24 + 
                                      sdr*age_group25.34 + 
                                      sdr*age_group35.44 + 
                                      sdr*age_group45.54 + 
                                      sdr*age_group55.64 + 
                                      early_voting_narrow*age_group18.24 + 
                                      early_voting_narrow*age_group25.34 + 
                                      early_voting_narrow*age_group35.44 + 
                                      early_voting_narrow*age_group45.54 + 
                                      early_voting_narrow*age_group55.64 +
                                      anyid2*age_group18.24 + 
                                      anyid2*age_group25.34 + 
                                      anyid2*age_group35.44 + 
                                      anyid2*age_group45.54 + 
                                      anyid2*age_group55.64 +
                                      anyphotoid2*age_group18.24 + 
                                      anyphotoid2*age_group25.34 + 
                                      anyphotoid2*age_group35.44 + 
                                      anyphotoid2*age_group45.54 + 
                                      anyphotoid2*age_group55.64 +
                                      factor(race) + sex + faminc + educ + presidential_year + 
                                      factor(state) + factor(year), 
                                    cps, clusters=st, se_type="stata"))

model_1$Model <- "Individual Level\nBivariate"
model_2$Model <- "Individual Level\nControls"

plotdata <- rbind(model_1, model_2)

plotdata$policy[grepl("sdr", plotdata$term)] <- "SDR"
plotdata$policy[grepl("early_voting", plotdata$term)] <- "Early Voting"
plotdata$policy[grepl("anyid2", plotdata$term)] <- "Non-Strict Voter ID"
plotdata$policy[grepl("photo", plotdata$term)] <- "Strict Voter ID"
plotdata$policy[grepl("absvot", plotdata$term)] <- "No Fault Absentee"

plotdata <- plotdata[!is.na(plotdata$policy),]

plotdata$age_group[grepl("*:", plotdata$term)==F] <- "65+"
plotdata$age_group[grepl("18", plotdata$term)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$term)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$term)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$term)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$term)] <- "55\n-\n64"

#plotdata[,c("estimate","std.error")] <- sapply(plotdata[,c("estimate","std.error")], function(x) as.numeric(as.character(x)))
#plotdata[,c("term","age_group")] <- sapply(plotdata[,c("term","age_group")], as.character)

for(i in c("sdr","early_voting_narrow","anyid2","anyphotoid2")){
  
  plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] <-
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] +
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group=="65+"]
  
}

plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

pd <- position_dodge(.3)

pdf("DiD_alllaws_plot_new_narrow.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=Model)) +
  geom_point(size=1, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  facet_wrap(~policy) +
  geom_hline(yintercept=0, linetype=2) +
  scale_y_continuous(breaks=c(-.05, 0, .05)) +
  coord_cartesian(ylim=c(-0.075,0.075)) +
  scale_color_manual(values=c("grey", "black")) +
  xlab("Age") +
  ylab("Turnout Effect") +
  theme_bw()
dev.off()


#Broad EV coding:
gc()

model_1 <- estimatr::tidy(lm_robust(voted ~ sdr*age_group18.24 + 
                                      sdr*age_group25.34 + 
                                      sdr*age_group35.44 + 
                                      sdr*age_group45.54 + 
                                      sdr*age_group55.64 + 
                                      early_voting_broad*age_group18.24 + 
                                      early_voting_broad*age_group25.34 + 
                                      early_voting_broad*age_group35.44 + 
                                      early_voting_broad*age_group45.54 + 
                                      early_voting_broad*age_group55.64 +
                                      anyid2*age_group18.24 + 
                                      anyid2*age_group25.34 + 
                                      anyid2*age_group35.44 + 
                                      anyid2*age_group45.54 + 
                                      anyid2*age_group55.64 +
                                      anyphotoid2*age_group18.24 + 
                                      anyphotoid2*age_group25.34 + 
                                      anyphotoid2*age_group35.44 + 
                                      anyphotoid2*age_group45.54 + 
                                      anyphotoid2*age_group55.64 +
                                      #factor(race) + sex + faminc + educ + presidential_year + 
                                      factor(state) + factor(year), 
                                    cps, clusters=st, se_type="stata"))

model_2 <- estimatr::tidy(lm_robust(voted ~ sdr*age_group18.24 + 
                                      sdr*age_group25.34 + 
                                      sdr*age_group35.44 + 
                                      sdr*age_group45.54 + 
                                      sdr*age_group55.64 + 
                                      early_voting_broad*age_group18.24 + 
                                      early_voting_broad*age_group25.34 + 
                                      early_voting_broad*age_group35.44 + 
                                      early_voting_broad*age_group45.54 + 
                                      early_voting_broad*age_group55.64 +
                                      anyid2*age_group18.24 + 
                                      anyid2*age_group25.34 + 
                                      anyid2*age_group35.44 + 
                                      anyid2*age_group45.54 + 
                                      anyid2*age_group55.64 +
                                      anyphotoid2*age_group18.24 + 
                                      anyphotoid2*age_group25.34 + 
                                      anyphotoid2*age_group35.44 + 
                                      anyphotoid2*age_group45.54 + 
                                      anyphotoid2*age_group55.64 +
                                      factor(race) + sex + faminc + educ + presidential_year + 
                                      factor(state) + factor(year), 
                                    cps, clusters=st, se_type="stata"))

model_1$Model <- "Individual Level\nBivariate"
model_2$Model <- "Individual Level\nControls"

plotdata <- rbind(model_1, model_2)

plotdata$policy[grepl("sdr", plotdata$term)] <- "SDR"
plotdata$policy[grepl("early_voting", plotdata$term)] <- "Early Voting"
plotdata$policy[grepl("anyid2", plotdata$term)] <- "Non-Strict Voter ID"
plotdata$policy[grepl("photo", plotdata$term)] <- "Strict Voter ID"
plotdata$policy[grepl("absvot", plotdata$term)] <- "No Fault Absentee"

plotdata <- plotdata[!is.na(plotdata$policy),]

plotdata$age_group[grepl("*:", plotdata$term)==F] <- "65+"
plotdata$age_group[grepl("18", plotdata$term)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$term)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$term)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$term)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$term)] <- "55\n-\n64"

#plotdata[,c("estimate","std.error")] <- sapply(plotdata[,c("estimate","std.error")], function(x) as.numeric(as.character(x)))
#plotdata[,c("term","age_group")] <- sapply(plotdata[,c("term","age_group")], as.character)

for(i in c("sdr","early_voting_broad","anyid2","anyphotoid2")){
  
  plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] <-
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] +
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group=="65+"]
  
}

plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

pd <- position_dodge(.3)

pdf("DiD_alllaws_plot_new_broad.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=Model)) +
  geom_point(size=1, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  facet_wrap(~policy) +
  geom_hline(yintercept=0, linetype=2) +
  scale_y_continuous(breaks=c(-.05, 0, .05)) +
  coord_cartesian(ylim=c(-0.075,0.075)) +
  scale_color_manual(values=c("grey", "black")) +
  xlab("Age") +
  ylab("Turnout Effect") +
  theme_bw()
dev.off()



#In Person EV coding:
gc()

model_1 <- estimatr::tidy(lm_robust(voted ~ sdr*age_group18.24 + 
                                      sdr*age_group25.34 + 
                                      sdr*age_group35.44 + 
                                      sdr*age_group45.54 + 
                                      sdr*age_group55.64 + 
                                      early_voting_person*age_group18.24 + 
                                      early_voting_person*age_group25.34 + 
                                      early_voting_person*age_group35.44 + 
                                      early_voting_person*age_group45.54 + 
                                      early_voting_person*age_group55.64 +
                                      anyid2*age_group18.24 + 
                                      anyid2*age_group25.34 + 
                                      anyid2*age_group35.44 + 
                                      anyid2*age_group45.54 + 
                                      anyid2*age_group55.64 +
                                      anyphotoid2*age_group18.24 + 
                                      anyphotoid2*age_group25.34 + 
                                      anyphotoid2*age_group35.44 + 
                                      anyphotoid2*age_group45.54 + 
                                      anyphotoid2*age_group55.64 +
                                      #factor(race) + sex + faminc + educ + presidential_year + 
                                      factor(state) + factor(year), 
                                    cps, clusters=st, se_type="stata"))

model_2 <- estimatr::tidy(lm_robust(voted ~ sdr*age_group18.24 + 
                                      sdr*age_group25.34 + 
                                      sdr*age_group35.44 + 
                                      sdr*age_group45.54 + 
                                      sdr*age_group55.64 + 
                                      early_voting_person*age_group18.24 + 
                                      early_voting_person*age_group25.34 + 
                                      early_voting_person*age_group35.44 + 
                                      early_voting_person*age_group45.54 + 
                                      early_voting_person*age_group55.64 +
                                      anyid2*age_group18.24 + 
                                      anyid2*age_group25.34 + 
                                      anyid2*age_group35.44 + 
                                      anyid2*age_group45.54 + 
                                      anyid2*age_group55.64 +
                                      anyphotoid2*age_group18.24 + 
                                      anyphotoid2*age_group25.34 + 
                                      anyphotoid2*age_group35.44 + 
                                      anyphotoid2*age_group45.54 + 
                                      anyphotoid2*age_group55.64 +
                                      factor(race) + sex + faminc + educ + presidential_year + 
                                      factor(state) + factor(year), 
                                    cps, clusters=st, se_type="stata"))

model_1$Model <- "Individual Level\nBivariate"
model_2$Model <- "Individual Level\nControls"

plotdata <- rbind(model_1, model_2)

plotdata$policy[grepl("sdr", plotdata$term)] <- "SDR"
plotdata$policy[grepl("early_voting", plotdata$term)] <- "Early Voting"
plotdata$policy[grepl("anyid2", plotdata$term)] <- "Non-Strict Voter ID"
plotdata$policy[grepl("photo", plotdata$term)] <- "Strict Voter ID"
plotdata$policy[grepl("absvot", plotdata$term)] <- "No Fault Absentee"

plotdata <- plotdata[!is.na(plotdata$policy),]

plotdata$age_group[grepl("*:", plotdata$term)==F] <- "65+"
plotdata$age_group[grepl("18", plotdata$term)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$term)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$term)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$term)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$term)] <- "55\n-\n64"

#plotdata[,c("estimate","std.error")] <- sapply(plotdata[,c("estimate","std.error")], function(x) as.numeric(as.character(x)))
#plotdata[,c("term","age_group")] <- sapply(plotdata[,c("term","age_group")], as.character)

for(i in c("sdr","early_voting_person","anyid2","anyphotoid2")){
  
  plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] <-
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] +
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group=="65+"]
  
}

plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

pd <- position_dodge(.3)

pdf("DiD_alllaws_plot_new_person.pdf", h=4, w=6)
ggplot(plotdata, aes(x=age_group, y=estimate, color=Model)) +
  geom_point(size=1, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  facet_wrap(~policy) +
  geom_hline(yintercept=0, linetype=2) +
  scale_y_continuous(breaks=c(-.05, 0, .05)) +
  coord_cartesian(ylim=c(-0.075,0.075)) +
  scale_color_manual(values=c("grey", "black")) +
  xlab("Age") +
  ylab("Turnout Effect") +
  theme_bw()
dev.off()















############################Grimmer et al's Cross-Sectional Placebo#######################

ever_sdr <- aggregate(sdr ~ state, data, sum, na.rm=T)
ever_sdr$ever_sdr[ever_sdr$sdr==0] <- 0
ever_sdr$ever_sdr[ever_sdr$sdr>0] <- 1

cps <- join(cps, ever_sdr[,c("state","ever_sdr")])

cps$ever_sdr[(cps$st=="OH" & cps$year>=2014) | cps$sdr==1] <- NA

logit_1 <- lm(voted ~ ever_sdr*age_group18.24 + 
                ever_sdr*age_group25.34 + 
                ever_sdr*age_group35.44 + 
                ever_sdr*age_group45.54 + 
                ever_sdr*age_group55.64 + 
                #race + sex + faminc + factor(educ) + 
                presidential_year + 
                factor(year),
              #clusters=state, se_type="stata",
              cps)

logit_2 <- lm(voted ~ ever_sdr*age_group18.24 + 
                ever_sdr*age_group25.34 + 
                ever_sdr*age_group35.44 + 
                ever_sdr*age_group45.54 + 
                ever_sdr*age_group55.64 + 
                factor(race) + sex + faminc + factor(educ) + 
                presidential_year + 
                factor(year),
              #clusters=state, se_type="stata",
              cps)

clusters_1 <- as.character(expand.model.frame(logit_1, ~st, na.expand = T)$st)
clusters_2 <- as.character(expand.model.frame(logit_2, ~st, na.expand = T)$st)

fit_1_r <- commarobust(logit_1, clusters=clusters_1, se_type = "stata")
fit_2_r <- commarobust(logit_2, clusters=clusters_2, se_type = "stata")

stargazer(logit_1, logit_2,
          se = starprep(fit_1_r, fit_2_r),
          out="crosssectional_cps_OLS_PLACEBO.html", type="html", 
          omit = c("year","race"),
          omit.labels = c("Race Dummies", "Year FEs"),
          star.cutoffs = c(0.05,0.01,0.001))



logit_1 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                #race + sex + faminc + factor(educ) + 
                presidential_year + 
                factor(year),
              #clusters=state, se_type="stata",
              cps)

logit_2 <- lm(voted ~ sdr*age_group18.24 + 
                sdr*age_group25.34 + 
                sdr*age_group35.44 + 
                sdr*age_group45.54 + 
                sdr*age_group55.64 + 
                factor(race) + sex + faminc + factor(educ) + 
                presidential_year + 
                factor(year),
              #clusters=state, se_type="stata",
              cps)

clusters_1 <- as.character(expand.model.frame(logit_1, ~st, na.expand = T)$st)
clusters_2 <- as.character(expand.model.frame(logit_2, ~st, na.expand = T)$st)

fit_1_r <- commarobust(logit_1, clusters=clusters_1, se_type = "stata")
fit_2_r <- commarobust(logit_2, clusters=clusters_2, se_type = "stata")

stargazer(logit_1, logit_2,
          se = starprep(fit_1_r, fit_2_r),
          out="crosssectional_cps_OLS_new.html", type="html", 
          omit = c("year","race"),
          omit.labels = c("Race Dummies", "Year FEs"),
          star.cutoffs = c(0.05,0.01,0.001))

















###########Granger Test########

agg_cps_age <- aggregate(voted ~ state + year + sdr + presidential_year + age_group, cps, mean, na.rm=T)

agg_cps_age <- agg_cps_age[order(agg_cps_age$state, agg_cps_age$year),]

firstyears <- do.call("rbind", as.list(by(agg_cps_age[agg_cps_age$sdr==1,], agg_cps_age$state[agg_cps_age$sdr==1], 
                                          head, n=1)))
firstyears$first_sdr_year <- firstyears$year

agg_cps_age <- join(agg_cps_age, firstyears[,c("state", "first_sdr_year")])

agg_cps_age$treat_year <- agg_cps_age$year - agg_cps_age$first_sdr_year



###Event Study
agg_cps_age$ever_treated[is.na(agg_cps_age$treat_year)] <- 0
agg_cps_age$ever_treated[!is.na(agg_cps_age$treat_year)] <- 1

agg_cps_age$treat_year[is.na(agg_cps_age$treat_year)] <- 0

cps_event <- join(cps, agg_cps_age[agg_cps_age$age_group=="18-24",c("state", "year", "treat_year", "ever_treated")])
gc()

cps_event$treat_year <- as.character(cps_event$treat_year)
cps_event$treat_year_num <- as.numeric(cps_event$treat_year)

#cps_event$treat_year[cps_event$treat_year_num%%4 != 0] <- NA


cps_event$treat_year <- relevel(factor(cps_event$treat_year), ref = 6)

output <- list()

for(i in levels(factor(cps_event$age_group))){
  
  cat(i, "\n")
  
  temp <- estimatr::tidy(lm_robust(voted ~ treat_year + 
                                     factor(race) + sex + faminc + educ + 
                                     #presidential_year +
                                     factor(year) + factor(state), 
                                   data=cps_event[cps_event$age_group==i & (cps_event$treat_year_num>=-16 & cps_event$treat_year_num<=16)
                                                  ,]),
                         clusters=state, se_type="stata")
  
  temp$age_group <- i
  temp <- temp[grepl("treat_year", temp$term),]
  temp_baseline <- temp[1,]
  temp_baseline$term <- "treat_year-2"
  temp_baseline$estimate <- 0
  temp_baseline$std.error <- NA
  temp <- rbind(temp, temp_baseline)
  temp$model <- "Controls"
  
  
  temp_bv <- estimatr::tidy(lm_robust(voted ~ treat_year + 
                                     #factor(race) + sex + faminc + educ + 
                                     #presidential_year +
                                     factor(year) + factor(state), 
                                   data=cps_event[cps_event$age_group==i & (cps_event$treat_year_num>=-16 & cps_event$treat_year_num<=16)
                                                  ,]),
                         clusters=state, se_type="stata")
  
  temp_bv$age_group <- i
  temp_bv <- temp_bv[grepl("treat_year", temp_bv$term),]
  temp_baseline <- temp_bv[1,]
  temp_baseline$term <- "treat_year-2"
  temp_baseline$estimate <- 0
  temp_baseline$std.error <- NA
  temp_bv <- rbind(temp_bv, temp_baseline)
  temp_bv$model <- "Bivariate"
  
  #temp$std.error[is.na(temp$std.error)] <- output[[1]]$std.error
  
  output[[i]] <- rbind(temp, temp_bv)

}

plotdata <- do.call(rbind, output)


for(i in levels(factor(plotdata$age_group))){
  
  if(length(plotdata$std.error[plotdata$model=="Controls" & plotdata$age_group==i & !is.nan(plotdata$std.error)])==1){
    plotdata$std.error[plotdata$model=="Controls" & is.na(plotdata$std.error) & plotdata$age_group==i] <- 
      plotdata$std.error[plotdata$model=="Controls" & plotdata$age_group=="45-54"]
  }
  
}

plotdata$term_clean <- as.numeric(as.character(gsub("treat_year", "", plotdata$term)))
plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)


lines_pre <- aggregate(estimate ~ age_group + model, plotdata[plotdata$term_clean<0,], mean, na.rm=T)
lines_post <- aggregate(estimate ~ age_group + model, plotdata[plotdata$term_clean>=0,], mean, na.rm=T)



pdf("eventstudy_controls.pdf", h=5, w=8)
ggplot(plotdata[plotdata$model=="Controls",], aes(x=term_clean, y=estimate)) +
  geom_ribbon(aes(ymin=cilow, ymax=cihigh), fill="grey80") +
  #geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, color="grey") +
  #geom_point() +
  geom_line() +
  geom_segment(data=lines_pre[lines_post$model=="Controls",], aes(x = -16, y = estimate, xend = 0, yend = estimate)) +
  geom_segment(data=lines_post[lines_post$model=="Controls",], aes(x = 0, y = estimate, xend = 16, yend = estimate)) +
  facet_wrap(~age_group) +
  scale_x_continuous(breaks=c(-16,-12,-8,-4,0,4,8,12,16)) +
  scale_y_continuous(breaks=c(-0.1,-0.05,0,0.05,0.1), limits=c(-.1,.1)) +
  geom_hline(yintercept=0, linetype=2) +
  geom_vline(xintercept=0, linetype=2) +
  xlab("Years Until SDR Implementation") +
  ylab("Treatment Effect") +
  theme_classic()
dev.off()

pdf("eventstudy_bivariate.pdf", h=5, w=8)
ggplot(plotdata[plotdata$model=="Bivariate",], aes(x=term_clean, y=estimate)) +
  geom_ribbon(aes(ymin=cilow, ymax=cihigh), fill="grey80") +
  #geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, color="grey") +
  #geom_point() +
  geom_line() +
  geom_segment(data=lines_pre[lines_post$model=="Bivariate",], aes(x = -16, y = estimate, xend = 0, yend = estimate)) +
  geom_segment(data=lines_post[lines_post$model=="Bivariate",], aes(x = 0, y = estimate, xend = 16, yend = estimate)) +
  facet_wrap(~age_group) +
  scale_x_continuous(breaks=c(-16,-12,-8,-4,0,4,8,12,16)) +
  scale_y_continuous(breaks=c(-0.1,-0.05,0,0.05,0.1), limits=c(-.1,.1)) +
  geom_hline(yintercept=0, linetype=2) +
  geom_vline(xintercept=0, linetype=2) +
  xlab("Years Until SDR Implementation") +
  ylab("Treatment Effect") +
  theme_classic()
dev.off()


#Back to Granger plot

agg_cps_age <- agg_cps_age %>% 
  mutate(group_id = group_indices_(agg_cps_age, .dots=c("state", "age_group")))

agg_cps_age <- agg_cps_age[order(agg_cps_age$state, agg_cps_age$year),]

agg_cps_age <- slide(agg_cps_age, Var = "sdr", slideBy = 2, GroupVar="group_id", NewVar = "sdr_lead_4")
agg_cps_age <- slide(agg_cps_age, Var = "sdr", slideBy = 4, GroupVar="group_id", NewVar = "sdr_lead_8")
agg_cps_age <- slide(agg_cps_age, Var = "sdr", slideBy = 6, GroupVar="group_id", NewVar = "sdr_lead_12")

agg_cps_age <- slide(agg_cps_age, Var = "sdr", slideBy = -1, GroupVar="group_id", NewVar = "sdr_lag_4")


ols_granger1 <- lm(voted ~ sdr + sdr_lead_4 + factor(state) + factor(year), agg_cps_age[agg_cps_age$age_group=="18-24",])
ols_granger2 <- lm(voted ~ sdr + sdr_lead_4 + sdr_lead_8 + factor(state) + factor(year), agg_cps_age[agg_cps_age$age_group=="18-24",])
ols_granger3 <- lm(voted ~ sdr + sdr_lead_4 + sdr_lead_8 + sdr_lead_12 + factor(state) + factor(year), agg_cps_age[agg_cps_age$age_group=="18-24",])


plotdata <- tidy(ols_granger3)
plotdata <- plotdata[grepl("sdr", plotdata$term),]
plotdata$treat_year <- c(0, -4, -8, -12)
plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)
plotdata[,c("estimate","cihigh","cilow")] <- plotdata[,c("estimate","cihigh","cilow")]*100


pdf("granger_plot.pdf", h=4, w=4)
ggplot(plotdata, aes(x=treat_year, estimate)) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  geom_hline(yintercept=0, linetype=2) +
  scale_x_continuous(breaks=c(-12,-8,-4,0)) +
  xlab("Years to SDR") +
  ylab("Turnout") +
  theme_classic()
dev.off()

#ols_granger4 <- lm(voted ~ sdr + sdr_lead_4 + sdr_lead_8 + sdr_lag_4 + sdr_lag_8 + factor(state) + factor(year), agg_cps_age[agg_cps_age$age_group=="18-24",])

stargazer(ols_granger1, ols_granger2, out="granger_diffindiff_cps.html", type="html", 
          omit = c("state", "year"),
          omit.labels = c("State FEs", "Year FEs"),
          star.cutoffs = c(0.05,0.01,0.001))

rm(list=ls(pattern="logit_"))
rm(list=ls(pattern="ols_")); gc()






######Lagged DV Model#####

agg_cps_age <- aggregate(voted ~ state + year + sdr + presidential_year + age_group, cps, mean, na.rm=T)

agg_cps_age <- agg_cps_age %>% 
  mutate(group_id = group_indices_(agg_cps_age, .dots=c("state", "age_group")))


agg_cps_age <- agg_cps_age[order(agg_cps_age$group_id, agg_cps_age$year),]
agg_cps_age <- slide(agg_cps_age, Var = "voted", slideBy = -4, GroupVar="group_id", NewVar = "voted_lag")
agg_cps_age <- slide(agg_cps_age, Var = "voted", slideBy = -2, GroupVar="group_id", NewVar = "voted_lag2")


output <- list()

for(j in c("sdr")){
  
  agg_cps_age$temp_var <- agg_cps_age[,j]
  
  output_lagged <- list()
 
  for(i in levels(factor(agg_cps_age$age_group))){
    
    cat(i, j, "\n")
    
    tempdata <- agg_cps_age[agg_cps_age$age_group==i,]
    
    tempdata$state <- as.character(tempdata$state)
    
    temp <- tidy(lm(voted ~ temp_var + presidential_year + voted_lag, tempdata))
    cluster_se_temp <- as.numeric(commarobust(lm(voted ~ temp_var + presidential_year + voted_lag, tempdata),
                                              clusters=tempdata$state[!is.na(tempdata$voted_lag)])$std.error)
    temp$std.error <- cluster_se_temp[!is.na(cluster_se_temp)]
    
    temp$group <- i
    temp$term[temp$term=="temp_var"] <- j
    temp$model <- "Lagged DV"
    output_lagged[[i]] <- temp[temp$term==j,]
    
  }
  
  output[[j]] <- do.call(rbind, output_lagged)
  
}

plotdata <- do.call(rbind.fill, c(output))

plotdata$term[plotdata$term=="sdr"] <- "SDR"

plotdata[,c("estimate","std.error")] <- round(plotdata[,c("estimate","std.error")], digits=3)

write.csv(plotdata[,c("term","estimate","std.error","group")], "lagged_dv_estimates.csv")


######DiD: Registering to Vote#######

agg_cps_age <- aggregate(vote_reg ~ state + year + sdr + presidential_year + age_group + 
                           povrate + pct_black_i + pct_white_i, cps, mean, na.rm=T)


output_lagged <- list()

for(i in levels(factor(agg_cps_age$age_group))){
  
  cat(i, "\n")
  
  tempdata <- agg_cps_age[agg_cps_age$age_group==i,]
  
  tempdata$state <- as.character(tempdata$state)
  
  temp <- estimatr::tidy(lm_robust(vote_reg ~ sdr + presidential_year + factor(state) + 
                                     pct_black_i + pct_white_i + povrate, 
                                   tempdata, cluster=state,
                                   se_type="stata"))
  temp$group <- i
  output_lagged[[i]] <- temp
  
}

did_output <- do.call(rbind, output_lagged)
did_output$cilow <- did_output$conf.low
did_output$cihigh <- did_output$conf.high


did_output$age_group[grepl("65", did_output$group)] <- "65+"
did_output$age_group[grepl("18", did_output$group)] <- "18\n-\n24"
did_output$age_group[grepl("25", did_output$group)] <- "25\n-\n34"
did_output$age_group[grepl("35", did_output$group)] <- "35\n-\n44"
did_output$age_group[grepl("45", did_output$group)] <- "45\n-\n54"
did_output$age_group[grepl("55", did_output$group)] <- "55\n-\n64"

#DV = Registered to vote

model_1 <- lm_robust(vote_reg ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       #factor(race) + sex + faminc + educ + presidential_year + 
                       factor(state) + factor(year), 
                     cps,
                     clusters = st,
                     se_type = "stata")

model_1 <- lm_robust(vote_reg ~ sdr*age_group18.24 + 
                       sdr*age_group25.34 + 
                       sdr*age_group35.44 + 
                       sdr*age_group45.54 + 
                       sdr*age_group55.64 + 
                       factor(race) + sex + faminc + educ + presidential_year + factor(year), 
                     cps,
                     clusters = st,
                     se_type = "stata")
plotdata <- estimatr::tidy(model_1)

plotdata$age_group[grepl("*:", plotdata$term)==F] <- "65+"
plotdata$age_group[grepl("18", plotdata$term)] <- "18\n-\n24"
plotdata$age_group[grepl("25", plotdata$term)] <- "25\n-\n34"
plotdata$age_group[grepl("35", plotdata$term)] <- "35\n-\n44"
plotdata$age_group[grepl("45", plotdata$term)] <- "45\n-\n54"
plotdata$age_group[grepl("55", plotdata$term)] <- "55\n-\n64"

plotdata[,c("estimate","std.error")] <- sapply(plotdata[,c("estimate","std.error")], function(x) as.numeric(as.character(x)))
plotdata[,c("term","age_group")] <- sapply(plotdata[,c("term","age_group")], as.character)


for(i in c("sdr")){
  
  plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] <-
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group!="65+"] +
    plotdata$estimate[grepl(i, plotdata$term) & plotdata$age_group=="65+"]
  
}

plotdata$cilow <- plotdata$estimate - (1.96*plotdata$std.error)
plotdata$cihigh <- plotdata$estimate + (1.96*plotdata$std.error)

plotdata$policy[grepl("sdr", plotdata$term)] <- "SDR"
plotdata <- plotdata[!is.na(plotdata$policy),]

plotdata$Model <- "Individual TSCS"
did_output$Model <- "Aggregate DiD"

plotdata <- rbind.fill(plotdata, did_output[grepl("sdr",did_output$term),])




pd <- position_dodge(.2)

pdf("votereg_plot.pdf", h=4, w=5)
ggplot(plotdata, aes(x=age_group, y=estimate, shape=Model, color=Model)) +
  geom_point(size=1.5, position=pd) +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, position=pd) +
  #facet_wrap(~policy) +
  geom_hline(yintercept=0, linetype=2) +
  scale_color_manual(values=c("black","grey")) +
  #scale_y_continuous(breaks=c(-.05, 0, .05, .1), limits=c(-.05, .1)) +
  xlab("Age") +
  ylab("SDR Effect on Registering to Vote") +
  theme_bw()
dev.off()



###########CPS Correlation Matrix###################


cps$sdr_age_group18.24 <- cps$sdr*cps$age_group18.24
cps$sdr_age_group25.34 <- cps$sdr*cps$age_group25.34
cps$sdr_age_group35.44 <- cps$sdr*cps$age_group35.44
cps$sdr_age_group45.54 <- cps$sdr*cps$age_group45.54
cps$sdr_age_group55.64 <- cps$sdr*cps$age_group55.64
cps$sdr_age_group65. <- cps$sdr*cps$age_group65.

cor_matrix <- cor(cps[,c("sdr", "age_group18.24", 
                         "age_group25.34", 
                         "age_group35.44", 
                         "age_group45.54", 
                         "age_group55.64",
                         "age_group65.",
                         "sdr_age_group18.24",
                         "sdr_age_group25.34",
                         "sdr_age_group45.54",
                         "sdr_age_group55.64",
                         "sdr_age_group65.",
                         "race", "sex", "faminc", "educ", "presidential_year", "year")], 
                  use="complete.obs")


write.csv(round(cor_matrix, 2), "correlation_matrix.csv", na="")











############################################################################
###################Policy Opinion by Age (Figure A10)######################################
############################################################################

cces <- read.dta("../cces_sdr_final.dta", convert.factors = "string")

cces$age_group[cces$age<=24] <- "18\n-\n24"
cces$age_group[cces$age>24 & cces$age<35] <- "25\n-\n34"
cces$age_group[cces$age>34 & cces$age<45] <- "35\n-\n44"
cces$age_group[cces$age>44 & cces$age<55] <- "45\n-\n54"
cces$age_group[cces$age>54 & cces$age<65] <- "55\n-\n64"
cces$age_group[cces$age>64] <- "65+"

output <- list()

for(i in c("minimumwage", "capntrade", "bangaymarriage", "schip", "abortionyesno", "partialbirth", "immigrantcitizenship", "capitalgains", "guncontrol", "affirmativeaction", "acaopinion")){
  
  cces$tempvar <- cces[,i]
  
  tempdata <- model.frame(tempvar ~ age_group + race + famincome + female + education + state_pre + year, cces)
  
  preds <- predict(glm(tempvar ~ age_group + factor(race) + famincome + female + education + factor(state_pre), tempdata,
                       family=binomial(link="logit")), se.fit=T, type="response")
  
  tempdata$pred_prob <- preds$fit
  tempdata$pred_se <- preds$se.fit
  
  temp_out <- aggregate(cbind(pred_prob, pred_se) ~ age_group, tempdata, mean, na.rm=T)
  temp_out$variable <- i
  
  output[[i]] <- temp_out
  
}

plotdata <- do.call(rbind, output)

plotdata$variable_full[plotdata$variable=="minimumwage"] <- "Increase\nMin. Wage"
plotdata$variable_full[plotdata$variable=="capntrade"] <- "Cap and Trade"
plotdata$variable_full[plotdata$variable=="bangaymarriage"] <- "Ban Gay\nMarriage"
plotdata$variable_full[plotdata$variable=="schip"] <- "SCHIP"
plotdata$variable_full[plotdata$variable=="abortionyesno"] <- "Legal Abortion"
plotdata$variable_full[plotdata$variable=="partialbirth"] <- "Ban Partial\nBirth Abortion"
plotdata$variable_full[plotdata$variable=="immigrantcitizenship"] <- "Path to Citizenship"
plotdata$variable_full[plotdata$variable=="capitalgains"] <- "Capital Gains\nTax Exemption"
plotdata$variable_full[plotdata$variable=="guncontrol"] <- "Stricter Gun Control"
plotdata$variable_full[plotdata$variable=="affirmativeaction"] <- "Affirmative Action"
plotdata$variable_full[plotdata$variable=="acaopinion"] <- "Affordable Care Act"

plotdata$cilow <- plotdata$pred_prob - (1.96*plotdata$pred_se)
plotdata$cihigh <- plotdata$pred_prob + (1.96*plotdata$pred_se)

pdf("policy_attitudes_byage_cces.pdf", h=7, w=7)
ggplot(plotdata, aes(x=age_group, y=pred_prob)) +
  geom_point() +
  geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0) +
  facet_wrap(~variable_full, ncol=3) +
  xlab("Age Group") +
  ylab("Predicted Policy Support") +
  scale_y_continuous(limits=c(.2,.8), breaks=c(.25,.5,.75)) +
  theme_bw()
dev.off()


rm(cces); gc()













###############################################################################################

###################Matching (Table A5 and Table A6)####################     Note: Runtime is extremely long

setwd("./Matching Output")

############################################################################
rm(list=ls(pattern="logit_"))
rm(list=ls(pattern="ols_")); gc()

variables <- c("sdr", "voted", "age_group", "age", "sex", "faminc", "educ", "race.100", "race.200", "race.300","race.650","race.651","race.652",
               "race.700","race.801","race.802","race.803","race.804","race.805","race.806",
               "race.807","race.808","race.809","race.810","race.811","race.812","race.813",
               "race.814","race.815","race.816","race.817","race.818","race.819","race.820","race.830")

exact_vars <- c(FALSE, FALSE, FALSE,
                TRUE, TRUE, TRUE,
                TRUE,TRUE,TRUE,
                TRUE, TRUE)

cps <- cps[,names(cps) %in% 
             c("serial", "month", "county", "wtfinl", "state", "pernum", "cpsid")==F]; gc()


##By Age Group

for(i in levels(factor(cps$age_group))[6]){
  
  for(y in levels(factor(cps$year[cps$year>=1982]))){
    
    cat(i, " ", y, "\n")
    
    cps_temp <- cps[cps$age_group==i & !is.na(cps$sdr) & !is.na(cps$voted) & !is.na(cps$race) &
                      !is.na(cps$year) & !is.na(cps$faminc) & !is.na(cps$educ) & 
                      !is.na(cps$age) & !is.na(cps$sex) &
                      cps$year==as.integer(y),]
    gc()
    
    gen = GenMatch(Tr=cps_temp$sdr,X=with(cps_temp,cbind(faminc, educ, age, 
                                                         sex, race.100, race.200, race.300,
                                                         race.650,race.651,race.652,
                                                         race.700#,race.801,race.802,
                                                         #race.803,race.804,race.805,
                                                         #race.806,race.807,race.808,
                                                         #race.809,race.810,race.811,
                                                         #race.812,race.813,race.814,
                                                         #race.815,race.816,race.817,
                                                         #race.818,race.819,race.820,
                                                         #race.830
    )), 
    exact=exact_vars, 
    max.generations = 100, pop.size=100, 
    #cluster=cl, 
    verbose=T)
    gc()
    
    #save(gen, file=paste("genmatch_", j, "_", y, "_18.12.8.RData", sep=""))
    
    ###Now Run the Match
    matchout = Match(Y=cps_temp$voted, Tr=cps_temp$sdr, 
                     X=with(cps_temp,cbind(faminc, educ, age, sex, 
                                           race.100, race.200, race.300,
                                           race.650,race.651,race.652,
                                           race.700#,race.801,race.802,
                                           #race.803,race.804,race.805,
                                           #race.806,race.807,race.808,
                                           #race.809,race.810,race.811,
                                           #race.812,race.813,race.814,
                                           #race.815,race.816,race.817,
                                           #race.818,race.819,race.820,
                                           #race.830
                     )), 
                     exact=exact_vars, Weight.matrix=gen,
                     ties=F)
    rm(gen); gc()
    
    j <- gsub("+","",i)
    
    save(matchout, file=paste("matchout_", j, "_", y, "_20.3.2.RData", sep=""))
    
    #saving data
    temp_treated <- cps_temp[matchout$index.treated,]
    temp_control <- cps_temp[matchout$index.control,]
    
    save(temp_treated, file=paste("data_treated_", j, "_", y, "_20.3.2.RData", sep=""))
    save(temp_control, file=paste("data_control_", j, "_", y, "_20.3.2.RData", sep=""))
    
    rm(matchout); gc() 
    
  }
  
}



########Matched Datasets#########



treated_files <- list.files(pattern="data_treated")
control_files <- list.files(pattern="data_control")

data_list <- list()

for(i in 1:length(treated_files)){
  
  load(treated_files[i])
  load(control_files[i])
  
  names(temp_control) <- paste(names(temp_control), "_c", sep="")
  names(temp_treated) <- paste(names(temp_treated), "_t", sep="")
  
  data_list[[i]] <- cbind(temp_control, temp_treated)
  
  rm(temp_control, temp_treated); gc()
}

matched_data <- do.call(rbind, data_list)



balance_vars <- c("age_t", "faminc_t", "educ_t", "sex_t", "race.100_t", "race.200_t", "race.300_t", "race.650_t", "race.651_t", 
                  "race.652_t", "race.700_t", "race.801_t", "race.802_t", "race.803_t", "race.804_t", "race.805_t",
                  "race.806_t", "race.807_t", "race.808_t", "race.809_t", "race.810_t", "race.811_t", "race.812_t", 
                  "race.813_t","race.814_t", "race.815_t", "race.816_t", "race.817_t", "race.818_t", "race.819_t",
                  "race.820_t","race.830_t")

output_list <- list()
balance_list <- list()

diff_in_effects_check <- t.test((matched_data$voted_t[matched_data$age_group_t=="18-24"]-matched_data$voted_c[matched_data$age_group_c=="18-24"]), 
                                (matched_data$voted_t[matched_data$age_group_t=="35-44"] - matched_data$voted_c[matched_data$age_group_c=="35-44"]))

diff_in_effects_check2 <- t.test((matched_data$voted_t[matched_data$age_group_t=="25-34"]-matched_data$voted_c[matched_data$age_group_c=="25-34"]), 
                                 (matched_data$voted_t[matched_data$age_group_t=="35-44"] - matched_data$voted_c[matched_data$age_group_c=="35-44"]))

for(i in levels(factor(matched_data$age_group_c))){
  
  temp <- matched_data[matched_data$age_group_c==i & matched_data$age_group_t==i,]
  
  temp_est <- data.frame(rbind(as.numeric(unlist(t.test(temp$voted_t, temp$voted_c, paired=T)[c("conf.int")]))))
  temp_est[,3] <- as.numeric(t.test(temp$voted_t, temp$voted_c, paired=T)[c("estimate")])
  
  names(temp_est) <- c("cilow", "cihigh", "estimate")
  temp_est$age_group <- i
  
  output_list[[i]] <- temp_est
  
  balance_j <- list()
  
  for(j in balance_vars){
    
    temp_balance <- data.frame(cbind(mean(temp[,j], na.rm=T), mean(temp[,gsub("_t", "_c", j)], na.rm=T),
                                     t.test(temp[,j], temp[,gsub("_t", "_c", j)], paired=T)$p.val), 
                               gsub("_t", "", j), i)
    
    names(temp_balance) <- c("mean_treated", "mean_control", "pval", "variable", "age_group")
    
    balance_j[[j]] <- temp_balance
    
  }
  
  balance_list[[i]] <- do.call(rbind, balance_j)
  
}

estimates <- do.call(rbind, output_list)
write.csv(estimates, "matching_estimates.csv", na="")


balance <- do.call(rbind, balance_list)

balance[,c("mean_treated","mean_control","pval")] <- round(balance[,c("mean_treated","mean_control","pval")], digits=3)
balance$pval[is.nan(balance$pval)] <- 1
balance <- balance[grepl("race.8",balance$variable)==F,]

write.csv(balance, "matching_balance_estimates.csv", na="", row.names = F)

pd <- position_dodge(.5)

pdf("balance_plot.pdf", h=5, w=5)
ggplot(balance[balance$age_group=="18-24",], aes(x=variable, y=pval)) +
  #geom_point(data=data[data$year>=1980 & !is.na(data$sdr),], aes(x=factor(sdr), y=vep.highest.office), alpha=.03) +
  geom_point(size=1) +
  #geom_errorbar(aes(ymin=cilow, ymax=cihigh), width=0, size=1, position=pd) +
  #facet_wrap(~term) +
  #scale_y_continuous(breaks=c(0,5,10,15), limits=c(0,15)) +
  #scale_x_discrete(breaks=c(0,1)) +
  #scale_color_manual(values=c("black", "grey", "grey"), guide=F) +
  geom_hline(yintercept=0, linetype=2) +
  coord_flip() +
  ylab("T-Test P-Value") +
  xlab("Matching Variable") +
  theme_bw()
dev.off()







#################Grimmer et al Matching Placebo Test (Table A7)#################
setwd("./placebo")


rm(list=ls(pattern="indiv"))
rm(list=ls(pattern="mlm"))
gc()

ever_sdr <- aggregate(sdr ~ state, data, sum, na.rm=T)
ever_sdr$ever_sdr[ever_sdr$sdr==0] <- 0
ever_sdr$ever_sdr[ever_sdr$sdr>0] <- 1

cps <- join(cps, ever_sdr[,c("state","ever_sdr")])

cps$ever_sdr[(cps$st=="OH" & cps$year>=2014) | cps$sdr==1] <- NA


cem_variables <- c("ever_sdr", "voted", "age_group", "age", "sex", "faminc", "educ",
                   "race.100", "race.200", "race.300", "year", "presidential_year")

cem_data <- cps[, cem_variables]

cem_data <- cem_data[complete.cases(cem_data),]

qm_temp <- cem_data[cem_data$age_group=="18-24", (names(cem_data) %in% c("age_group"))==F]

output <- list()

for(i in levels(factor(cem_data$age_group))){
  
  qm_temp <- cem_data[cem_data$age_group==i, (names(cem_data) %in% c("age_group"))==F]
  
  qm1 <- quickmatch(qm_temp, treatments=qm_temp$ever_sdr)
  out1 <- lm_match(qm_temp$voted, qm_temp$ever_sdr, qm1)
  temp <- data.frame(cbind(out1$effects[2,1], out1$effect_variances[2,1], i))
  names(temp) <- c("estimate", "se", "age_group")
  output[[i]] <- temp
  
}

plotdata <- do.call(rbind, output)

plotdata$estimate <- as.numeric(as.character(plotdata$estimate))
plotdata$se <- sqrt(as.numeric(as.character(plotdata$se)))

plotdata$cilow <- plotdata$estimate - 1.96*plotdata$se
plotdata$cihigh <- plotdata$estimate + 1.96*plotdata$se
plotdata$age_group_lb <- c("18\n-\n24","25\n-\n34","35\n-\n44","45\n-\n54","55\n-\n64",  "65+")

write.csv(plotdata, "matching_placebo_estimates.csv", na="")





#########Run it############
rm(list=ls(pattern="logit_"))
rm(list=ls(pattern="ols_")); gc()

variables <- c("sdr", "voted", "age_group", "age", "sex", "faminc", "educ", "race.100", "race.200", "race.300","race.650","race.651","race.652",
               "race.700","race.801","race.802","race.803","race.804","race.805","race.806",
               "race.807","race.808","race.809","race.810","race.811","race.812","race.813",
               "race.814","race.815","race.816","race.817","race.818","race.819","race.820","race.830",
               "ever_sdr")

exact_vars <- c(FALSE, FALSE, FALSE,
                TRUE, TRUE, TRUE,
                TRUE,TRUE,TRUE,
                TRUE, TRUE)

cps <- cps[,names(cps) %in% 
             c("serial", "month", "county", "wtfinl", "state", "pernum", "cpsid")==F]; gc()


##By Age Group

for(i in levels(factor(cps$age_group))[3:4]){
  
  for(y in levels(factor(cps$year[cps$year>=1982]))){
    
    cat(i, " ", y, "\n")
    
    cps_temp <- cps[cps$age_group==i & !is.na(cps$ever_sdr) & !is.na(cps$voted) & !is.na(cps$race) &
                      !is.na(cps$year) & !is.na(cps$faminc) & !is.na(cps$educ) & 
                      !is.na(cps$age) & !is.na(cps$sex) &
                      cps$year==as.integer(y),]
    gc()
    
    gen = GenMatch(Tr=cps_temp$ever_sdr,X=with(cps_temp,cbind(faminc, educ, age, 
                                                              sex, race.100, race.200, race.300,
                                                              race.650,race.651,race.652,
                                                              race.700#,race.801,race.802,
                                                              #race.803,race.804,race.805,
                                                              #race.806,race.807,race.808,
                                                              #race.809,race.810,race.811,
                                                              #race.812,race.813,race.814,
                                                              #race.815,race.816,race.817,
                                                              #race.818,race.819,race.820,
                                                              #race.830
    )), 
    exact=exact_vars, 
    max.generations = 20, pop.size=20, 
    #cluster=cl, 
    verbose=T)
    gc()
    
    #save(gen, file=paste("genmatch_", j, "_", y, "_18.12.8.RData", sep=""))
    
    ###Now Run the Match
    matchout = Match(Y=cps_temp$voted, Tr=cps_temp$ever_sdr, 
                     X=with(cps_temp,cbind(faminc, educ, age, sex, 
                                           race.100, race.200, race.300,
                                           race.650,race.651,race.652,
                                           race.700#,race.801,race.802,
                                           #race.803,race.804,race.805,
                                           #race.806,race.807,race.808,
                                           #race.809,race.810,race.811,
                                           #race.812,race.813,race.814,
                                           #race.815,race.816,race.817,
                                           #race.818,race.819,race.820,
                                           #race.830
                     )), 
                     exact=exact_vars, Weight.matrix=gen,
                     ties=F)
    rm(gen); gc()
    
    j <- gsub("+","",i)
    
    save(matchout, file=paste("placeboout_", j, "_", y, "_18.12.9.RData", sep=""))
    
    #saving data
    temp_treated <- cps_temp[matchout$index.treated,]
    temp_control <- cps_temp[matchout$index.control,]
    
    save(temp_treated, file=paste("placebo_treated_", j, "_", y, "_18.12.9.RData", sep=""))
    save(temp_control, file=paste("placebo_control_", j, "_", y, "_18.12.9.RData", sep=""))
    
    rm(matchout); gc() 
    
  }
  
}



########Matched Datasets#########


treated_files <- list.files(pattern="placebo_treated")
control_files <- list.files(pattern="placebo_control")

data_list <- list()

for(i in 1:length(treated_files)){
  
  load(treated_files[i])
  load(control_files[i])
  
  names(temp_control) <- paste(names(temp_control), "_c", sep="")
  names(temp_treated) <- paste(names(temp_treated), "_t", sep="")
  
  data_list[[i]] <- cbind(temp_control, temp_treated)
  
  rm(temp_control, temp_treated); gc()
}

matched_data <- do.call(rbind.fill, data_list)



balance_vars <- c("age_t", "faminc_t", "educ_t", "sex_t", "race.100_t", "race.200_t", "race.300_t", "race.650_t", "race.651_t", 
                  "race.652_t", "race.700_t", "race.801_t", "race.802_t", "race.803_t", "race.804_t", "race.805_t",
                  "race.806_t", "race.807_t", "race.808_t", "race.809_t", "race.810_t", "race.811_t", "race.812_t", 
                  "race.813_t","race.814_t", "race.815_t", "race.816_t", "race.817_t", "race.818_t", "race.819_t",
                  "race.820_t","race.830_t")

output_list <- list()
balance_list <- list()

for(i in levels(factor(matched_data$age_group_c))){
  
  temp <- matched_data[matched_data$age_group_c==i & matched_data$age_group_t==i,]
  
  temp_est <- data.frame(rbind(as.numeric(unlist(t.test(temp$voted_t, temp$voted_c, paired=T)[c("conf.int")]))))
  temp_est[,3] <- as.numeric(t.test(temp$voted_t, temp$voted_c, paired=T)[c("estimate")])
  
  names(temp_est) <- c("cilow", "cihigh", "estimate")
  temp_est$age_group <- i
  
  output_list[[i]] <- temp_est
  
  balance_j <- list()
  
  for(j in balance_vars){
    
    temp_balance <- data.frame(cbind(mean(temp[,j], na.rm=T), mean(temp[,gsub("_t", "_c", j)], na.rm=T),
                                     t.test(temp[,j], temp[,gsub("_t", "_c", j)], paired=T)$p.val), 
                               gsub("_t", "", j), i)
    
    names(temp_balance) <- c("mean_treated", "mean_control", "pval", "variable", "age_group")
    
    balance_j[[j]] <- temp_balance
    
  }
  
  balance_list[[i]] <- do.call(rbind, balance_j)
  
}

estimates <- do.call(rbind, output_list)
write.csv(estimates, "placebo_estimates.csv", na="")


balance <- do.call(rbind, balance_list)

balance$pval[is.nan(balance$pval)] <- 1

write.csv(balance, "placebo_balance_estimates.csv", na="")

pd <- position_dodge(.5)



########By year/age group####
estimates_list <- list()
balance_list <- list()
data_list_treated <- list()
data_list_control <- list()

for(i in list.files(pattern="matchout")){
  
  load(i)
  
  temp_out <- data.frame(cbind(matchout$est, matchout$se.standard, matchout$nobs, i))
  names(temp_out) <- c("estimate", "se", "nobs", "file")
  estimates_list[[i]] <- temp_out
  
  mb <- MatchBalance(sdr ~ faminc + age + sex +
                       race.100 +  race.200 +  race.300 +
                       race.650 + race.651 + race.652 +
                       race.700# + race.801 + race.802 +
                     #race.803 + race.804 + race.805 +
                     #race.806 + race.807 + race.808 +
                     #race.809 + race.810 + race.811 +
                     #race.812 + race.813 + race.814 +
                     #race.815 + race.816 + race.817 +
                     #race.818 + race.819 + race.820 +
                     #race.830
                     , data=cps, match.out=matchout)
  
  gc()
  
  bm <- mb$BeforeMatching
  am <- mb$AfterMatching
  
  balance_b <- list()
  
  for(b in seq_along(c("faminc","age","sex","race.100","race.200","race.300","race.650",
                       "race.651","race.652","race.700"))){
    
    temp_bm <- bm[[b]]
    temp_am <- am[[b]]
    temp_b <- data.frame(cbind(temp_bm$mean.Co, temp_bm$mean.Tr, temp_bm$p.value,
                               temp_am$mean.Co, temp_am$mean.Tr, temp_am$p.value,
                               c("faminc","age","sex","race.100","race.200","race.300","race.650",
                                 "race.651","race.652","race.700")[b],
                               i))
    names(temp_b) <- c("bm_control", "bm_treatment", "bm_pval",
                       "am_control", "am_treatment", "am_pval",
                       "group", "file")
    
    balance_b[[b]] <- temp_b
    
  }
  
  balance_list[[i]] <- do.call(rbind, balance_b)
  
  data_list_treated[[i]] <- cps[matchout$index.treated,variables]
  data_list_control[[i]] <- cps[matchout$index.control,variables]
  
  rm(matchout); gc()
}

write.csv(do.call(rbind, balance_list), "balance_stats.csv", na = "", row.names = F)
write.csv(do.call(rbind, estimates_list), "matchout_estimates.csv", na = "", row.names = F)

full_treated_data <- do.call(rbind, data_list_treated); gc()
full_control_data <- do.call(rbind, data_list_control); gc()


#rm(data_list); gc()


#t.test()






##Full
gen = GenMatch(Tr=cps$sdr,X=with(cps,cbind(factor(year), factor(race), faminc, age, sex)), 
               exact=c(TRUE, TRUE, FALSE, FALSE, FALSE), 
               max.generations = 20, pop.size=100, cluster=cl, verbose=T)
gc()

###Now Run the Match
matchout = Match(Y=cps$voted, Tr=cps$sdr, 
                 X=with(cps,cbind(factor(year), factor(race), faminc, age, sex)), 
                 exact=c(TRUE, TRUE, FALSE, FALSE, FALSE), Weight.matrix=gen, ties=F)
summary(matchout)

save.image(file="genmatch_sdr_full_18.12.4.RData")

rm(gen, matchout); gc()

















